### Data preprocessing

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf

In [2]:
trainData = pd.read_csv('../../data/trainData.csv', header=None)
# AATI = Average Access Time Interval
trainData.columns = ["timestamp","sector_id", "# of blocks"]
trainData.head()

Unnamed: 0,timestamp,sector_id,# of blocks
0,1.652817,7487488,2048
1,1.652824,7489536,2048
2,1.65283,7491584,2048
3,1.652836,7493632,2048
4,1.652842,7495680,2048


In [3]:
testData = pd.read_csv("../../data/testData.csv", header=None)
testData.columns = ["timestamp","sector_id", "# of blocks"]
testData.head()

Unnamed: 0,timestamp,sector_id,# of blocks
0,0.0,303567,7
1,0.0,55590,6
2,0.026214,303574,7
3,0.026214,240840,6
4,0.117964,303581,7


In [4]:
testDataLabel = pd.read_csv("../../data/clustering/testDataClustered.csv")
testDataLabel.columns = ["sector_id", "frequency", "AATI", "time_interval_std","# of blocks", "cluster"]
print(testDataLabel.head())
print(testDataLabel["cluster"].value_counts())

   sector_id  frequency     AATI  time_interval_std  # of blocks  cluster
0     753921      90736  0.45920        23941.33982       544416        1
1     240840      48735  0.88641        24708.56721       292465        1
2     836706      31787  1.29678        28796.58080       195293        1
3     837306      31704  1.29935        28766.62024       192217        1
4     700132      31288  1.15671        17203.72980       247313        3
3    265152
1    184942
0    149575
2    101204
Name: cluster, dtype: int64


In [5]:
trainDataLabel = pd.read_csv("../../data/clustering/trainDataClustered.csv")
trainDataLabel.columns = ["sector_id", "frequency", "AATI", "time_interval_std","# of blocks", "cluster"]
print(trainDataLabel.head())
print(trainDataLabel["cluster"].value_counts())

   sector_id  frequency      AATI  time_interval_std  # of blocks  cluster
0       8488        966  36.08076        20060.26401         7728        3
1     205888        948  36.72951        20038.61273         7584        3
2     206064        948  36.76656        19881.60064         7584        3
3      74328        947  36.76833        20192.84732         7576        3
4      74408        945  36.88340        20118.25940         7560        3
0    697571
1    324263
3    322380
2    242486
Name: cluster, dtype: int64


In [6]:
# Constants
trainLabelSize = trainDataLabel["sector_id"].size
print("trainDataLabel size:", trainLabelSize)

trainDataLabel size: 1586700


In [7]:
trainDataLabel.dtypes

sector_id              int64
frequency              int64
AATI                 float64
time_interval_std    float64
# of blocks            int64
cluster                int64
dtype: object

In [8]:
trainData.dtypes

timestamp      float64
sector_id        int64
# of blocks      int64
dtype: object

In [9]:
trainingYLabelMap = {}

# assuming cluster 3 is hot and others as cold
for i in range(trainLabelSize):
  if trainDataLabel["cluster"][i] == 3:
    trainingYLabelMap[trainDataLabel["sector_id"][i]] = 1
  else:
    trainingYLabelMap[trainDataLabel["sector_id"][i]] = 0

trainData["hot/cold"] = [trainingYLabelMap[sectorId] if sectorId in trainingYLabelMap else np.nan for sectorId in trainData["sector_id"]]


In [10]:
testLabelSize = testDataLabel["sector_id"].size
testYLabelMap = {}
    
# assuming cluster 1 as hot
for i in range(testLabelSize):
  if testDataLabel["cluster"][i] == 1:
    testYLabelMap[testDataLabel["sector_id"][i]] = 1
  else:
    testYLabelMap[testDataLabel["sector_id"][i]] = 0

testData["hot/cold"] = [testYLabelMap[sectorId] if sectorId in testYLabelMap else np.nan for sectorId in testData["sector_id"]]

KeyboardInterrupt: 

In [None]:
assert not np.any(np.isnan(testData["hot/cold"]))

In [None]:
trainData["hot/cold"].value_counts()


1    5544771
0    4141490
Name: hot/cold, dtype: int64

In [None]:
testData["hot/cold"].value_counts()

1    2061887
0    2037467
Name: hot/cold, dtype: int64

In [None]:
# Normalizing number of blocks
# trainBlocksMean = trainData["# of blocks"].mean()
# trainBlocksStd = trainData["# of blocks"].std()

# trainData["# of blocks"] = (trainData["# of blocks"] - trainBlocksMean) / trainBlocksStd

# testBlocksMean = testData["# of blocks"].mean()
# testBlocksStd = testData["# of blocks"].std()

# testData["# of blocks"] = (testData["# of blocks"] - testBlocksMean) / testBlocksStd

In [None]:
def addressToTensor(address):
    addressSize = len(str(address)) 
    fill = [[0] * 10 for i in range(10 - addressSize)]
    arr = [[1 if j == int(char) else 0 for j in range(10)] for i, char in enumerate(str(address))]

    # tensor = tf.Variable(fill + arr)
    # res = np.asarray(fill+arr)
    return fill + arr

In [None]:
addressToTensor(1058376838)

[[0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
 [1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
 [0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
 [0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
 [0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 1, 0]]

In [None]:
# def oneHotTrainData(rawData):
#   count = 0
#   oneHotList = [] 
#   for elem in rawData:
#     oneHotList.append(addressToTensor(elem))
  
#   res = np.asarray(oneHotList)
#   return res

# v = np.vectorize(lambda row: addressToTensor(row))
# windowTrain["sector_id"] = v(windowTrain["sector_id"])

# windowTrain["sector_id"] = windowTrain.apply(lambda row: addressToTensor(row["sector_id"]), axis=1)
# windowTrain.head()

New feature for timestamp difference

In [None]:
trainDataSize = trainData["sector_id"].size
print(trainDataSize)

lastSectorEncounter = {}
timeDelta = [0] * trainDataSize

for i in range(trainDataSize):
  sectorId = trainData["sector_id"][i]
  if sectorId in lastSectorEncounter:
    timeDelta[i] = trainData["timestamp"][i] - lastSectorEncounter[sectorId]
  else:
    timeDelta[i] = 0
  lastSectorEncounter[sectorId] = trainData["timestamp"][i]

trainData["time_delta"] = timeDelta
# trainData["timestamp_delta"] = [trainData["timestamp"][i] - lastSectorEncounter if trainData["sector_id"][i] in lastSectorEncounter else  for i in range(trainDataSize)]


9686261


In [None]:
testDataSize = testData["sector_id"].size

lastSectorEncounter = {}
timeDelta = [0] * testDataSize

for i in range(testDataSize):
  sectorId = testData["sector_id"][i]
  if sectorId in lastSectorEncounter:
    timeDelta[i] = testData["timestamp"][i] - lastSectorEncounter[sectorId]
  else:
    timeDelta[i] = 0
  lastSectorEncounter[sectorId] = testData["timestamp"][i]

testData["time_delta"] = timeDelta

In [None]:
trainDeltaMean = trainData["time_delta"].mean()
trainDeltaStd = trainData["time_delta"].std()

trainData["time_delta"] = (trainData["time_delta"] - trainDeltaMean) / trainDeltaStd

testDeltaMean = testData["time_delta"].mean()
testDeltaStd = testData["time_delta"].std()

testData["time_delta"] = (testData["time_delta"] - testDeltaMean) / testDeltaStd

In [None]:
trainData["time_delta"].head()

0   -0.391064
1   -0.391064
2   -0.391064
3   -0.391064
4   -0.391064
Name: time_delta, dtype: float64

In [None]:
windowTrain = trainData[["sector_id","time_delta", "hot/cold"]]
windowTest = testData[["sector_id","time_delta", "hot/cold"]]

In [None]:
BATCH_SIZE = 256
WINDOW_SIZE = 128

In [None]:
class WindowGenerator():
  def __init__(self, input_width, label_width, shift,
               train_df=windowTrain,val_df=None, test_df=windowTest,
               label_columns=None, input_columns=None):
    # Store the raw data.
    self.train_df = train_df
    self.val_df = val_df
    self.test_df = test_df

    # Work out the label and input column indices.
    self.label_columns = label_columns
    self.input_columns = input_columns

    if label_columns is not None:
      self.label_columns_indices = {name: i for i, name in
                                    enumerate(label_columns)}
    if input_columns is not None:
      self.input_columns_indices = {name: i for i, name in
                                    enumerate(input_columns)}

    self.column_indices = {name: i for i, name in
                           enumerate(train_df.columns)}


    # Work out the window parameters.
    self.input_width = input_width
    self.label_width = label_width
    self.shift = shift

    self.total_window_size = input_width + shift

    self.input_slice = slice(0, input_width)
    self.input_indices = np.arange(self.total_window_size)[self.input_slice]

    self.label_start = self.total_window_size - self.label_width
    self.labels_slice = slice(self.label_start, None)
    self.label_indices = np.arange(self.total_window_size)[self.labels_slice]

  def __repr__(self):
    return '\n'.join([
        f'Total window size: {self.total_window_size}',
        f'Input indices: {self.input_indices}',
        f'Label indices: {self.label_indices}',
        f'Label column name(s): {self.label_columns}'])

  def split_window(self, features):
    inputs = features[:, self.input_slice, :]
    labels = features[:, self.labels_slice, :]
    if self.label_columns is not None:
      labels = tf.stack(
          [labels[:, :, self.column_indices[name]] for name in self.label_columns],
          axis=-1)

    if self.input_columns is not None:
      inputs = tf.stack(
          [inputs[:, :, self.column_indices[name]] for name in self.input_columns],
          axis=-1)
  
    # Slicing doesn't preserve static shape information, so set the shapes
    # manually. This way the `tf.data.Datasets` are easier to inspect.
    inputs.set_shape([None, self.input_width, None])
    labels.set_shape([None, self.label_width, None])
  
    return inputs, labels

  def make_dataset(self, data):
    data = np.array(data, dtype=np.float32)
    ds = tf.keras.utils.timeseries_dataset_from_array(
        data=data,
        targets=None,
        sequence_length=self.total_window_size,
        sequence_stride=1,
        shuffle=True,
        batch_size=BATCH_SIZE,
    )

    ds = ds.map(self.split_window)

    return ds
  
  @property
  def train(self):
    return self.make_dataset(self.train_df)
  
  @property
  def val(self):
    if not self.val_df:
      return None
      
    return self.make_dataset(self.val_df)

  @property
  def test(self):
    return self.make_dataset(self.test_df)
  
  @property
  def example(self):
    """Get and cache an example batch of `inputs, labels` for plotting. """
    result = getattr(self, '_example', None)
    if result is None:
      # No example batch was found, so get from the `.train` dataset"""
      result = next(iter(self.train))
      # And cache it for next time
      self._example = result
    return result


In [25]:
# input_width: how far should the model look back
# label_width: length of window to be predicted
# shift: how further into future to be predicted (shift by 1, predicts next value, shift by 0 predicts last input index's value)

# multivariate
# window = WindowGenerator(WINDOW_SIZE, 1, 0, input_columns=["sector_id", "# of blocks"], label_columns=["hot/cold"])

# univariate
window = WindowGenerator(WINDOW_SIZE, 1, 0, input_columns=["sector_id", "time_delta"], label_columns=["hot/cold"])

In [42]:
tf.config.run_functions_eagerly(True)
tf.data.experimental.enable_debug_mode()

In [43]:
testWindow = WindowGenerator(WINDOW_SIZE, 1, 0,train_df=trainData[["sector_id", "hot/cold"]][:10], input_columns=["sector_id"], label_columns=["hot/cold"])


def helper(features, label):
    numpyTensor = features.numpy()
    if tf.executing_eagerly():
        print("eager shit")
    print(numpyTensor)

    return (addressToTensor(features[0].numpy()), label)
encoded = testWindow.train.map(helper)
print(encoded)

AttributeError: in user code:

    File "/tmp/ipykernel_80145/3125151173.py", line 5, in helper  *
        numpyTensor = features.numpy()

    AttributeError: 'Tensor' object has no attribute 'numpy'


In [None]:
# example_window = tf.stack([np.array(windowTrain[:window.total_window_size]),
#                            np.array(windowTrain[100:100+window.total_window_size]),
#                            np.array(windowTrain[200:200+window.total_window_size])])

# example_inputs, example_labels = window.split_window(example_window)

# print('All shapes are: (batch, time, features)')
# print(f'Window shape: {example_window.shape}')
# print(f'Inputs shape: {example_inputs.shape}')
# print(f'Labels shape: {example_labels.shape}')

In [29]:
print(f'Input shape (batch, time, features): {window.example[0].shape}')
print(f'Label shape (batch, time, features): {window.example[0].shape}')

2022-09-05 17:52:25.697530: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-05 17:52:25.838735: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-05 17:52:25.839379: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-05 17:52:25.842100: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

Input shape (batch, time, features): (256, 128, 2)
Label shape (batch, time, features): (256, 128, 2)


In [None]:
  # makeWindow([1,2,3,4,5,6], [2,3,4,5,6,7], windowLength=2, step=2)

In [None]:
# windowedFeatures, windowedLabels = makeWindow(features,target, windowLength=100, step=10)

In [None]:
# print(windowedFeatures[0])
# print(windowedLabels[0])

### Model Implementation

Normalization on features using tf.Normalization

In [None]:
# normalizationLayer = tf.keras.layers.Normalization(axis=None)
# normalizationLayer.adapt(features)

In [None]:
# normalizedTrainInput = normalizationLayer(features)

Dataset of tensorflow,
originally thinking of using it as a window splitter, however, implemented window by myself

In [None]:
# Note: I basically concluded that large sector_ids are not possible to embed(preprocess).
# However, I thought of a way to represent sector_ids in tensors and that is one hot encoding and deep neural networks (aka. Dense, Fully connected)
# It could be wrong and I want more research on 
#   **representing numbers in one-hot encoding and RNN**
#   **Is it possible to feed LSTM large numbers**
# also checkout tf.data.Dataset.grouping_window() and tf.data.Dataset.window() functions 
# i think they can be used to generate windows. gl

# ds = tf.data.Dataset.from_tensor_slices((features, target))
# ds = ds.window(10, shift=1, drop_remainder=True)

In [None]:
# Operations on window
# count = 0

# def to_numpy(ds):
#     return list(ds.as_numpy_iterator())

# for window in ds:
#     if count == 5:
#         break
#     count += 1

In [30]:
maxSectorNumber = np.amax(trainData["sector_id"])
maxBlocks = np.amax(trainData["# of blocks"])
print("maxSectorNumber:", maxSectorNumber)
print("maxBlocks:", maxBlocks)

maxSectorNumber: 1000213824
maxBlocks: 2048


In [31]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  1


In [None]:
# tensorTrainingData = []

# for i in range(features.size):
#     tensorTrainingData.append(addressToTensor(features[i]))

# tensorTrainingData = tf.Variable(tensorTrainingData)

In [None]:
# tensorTestData = []

# for i in range(10000):
#     tensorTestData.append(addressToTensor(testFeatures[i]))

# tensorTestData = tf.Variable(tensorTestData)

In [None]:
# tensorTrainingData.shape

In [None]:

# embedding the sector number from 0-1
# input_shape=(number of time steps, number of features)
n_steps = WINDOW_SIZE
n_features = 1

embed_input = tf.keras.layers.Input(shape=(n_steps,))
blocks_input = tf.keras.layers.Input(shape=(n_steps,))

emb = tf.keras.layers.Embedding(input_dim=trainLabelSize, output_dim=256, input_length=n_steps)(embed_input)
# emb = tf.keras.layers.Embedding(input_dim=trainLabelSize, output_dim=64, input_length=WINDOW_SIZE)(embed_input)
# emb = tf.keras.layers.BatchNormalization()(emb)

# conv1 = tf.keras.layers.Conv1D(filters=64, kernel_size=3, padding='same')(emb)
# conv1 = tf.keras.layers.Activation('relu')(conv1)
# conv1 = tf.keras.layers.BatchNormalization()(conv1)
# conv1 = tf.keras.layers.MaxPooling1D(pool_size=2)(conv1)

# conv2 = tf.keras.layers.Conv1D(filters=32, kernel_size=3, padding='same')(conv1)
# conv2 = tf.keras.layers.Activation('relu')(conv2)
# conv2 = tf.keras.layers.BatchNormalization()(conv2)
# conv2 = tf.keras.layers.MaxPooling1D(pool_size=2)(conv2)

lstm = tf.keras.layers.LSTM(128, return_sequences=True)(emb)
dropout = tf.keras.layers.Dropout(0.1)(lstm)

flatten = tf.keras.layers.Flatten()(dropout)
conc = tf.keras.layers.Concatenate()([flatten, blocks_input])

# out = tf.keras.layers.Dense(1, activation='sigmoid')(dropout)
timeDistributed = tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(1, activation='sigmoid'))(conc)
model = tf.keras.Model(inputs=(embed_input,blocks_input), outputs=timeDistributed)

# model = tf.keras.Sequential()
# model.add(tf.keras.layers.Embedding(input_dim=trainLabelSize, output_dim=64, input_length=WINDOW_SIZE))

# model.add(tf.keras.layers.Conv1D(filters=64, kernel_size=3, activation='relu', padding='same'))
# model.add(tf.keras.layers.MaxPooling1D(pool_size=2))

# model.add(tf.keras.layers.Conv1D(filters=32, kernel_size=3, activation='relu', padding='same'))
# model.add(tf.keras.layers.MaxPooling1D(pool_size=2))

# model.add(tf.keras.layers.LSTM(128))
# model.add(tf.keras.layers.Dropout(0.1))
# model.add(tf.keras.layers.Dense(64,activation='relu'))
# model.add(tf.keras.layers.Dropout(0.1))
# model.add(tf.keras.layers.Dense(32,activation='relu'))
# model.add(tf.keras.layers.Flatten())
# model.add(tf.keras.layers.Dense(1,activation='sigmoid'))

model.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 128)]             0         
                                                                 
 embedding_1 (Embedding)     (None, 128, 256)          406195200 
                                                                 
 lstm_1 (LSTM)               (None, 128, 128)          197120    
                                                                 
 dropout_1 (Dropout)         (None, 128, 128)          0         
                                                                 
 time_distributed_1 (TimeDis  (None, 128, 1)           129       
 tributed)                                                       
                                                                 
Total params: 406,392,449
Trainable params: 406,392,449
Non-trainable params: 0
_____________________________________________

In [None]:
MAX_EPOCHS = 5
def compile_and_fit(model, window, patience=2, epochs=MAX_EPOCHS):
  early_stopping = tf.keras.callbacks.EarlyStopping('loss', patience=patience, mode='min')

  # model.compile(loss=tf.keras.losses.BinaryCrossentropy(), 
  #               optimizer=tf.keras.optimizers.Adam(clipnorm=0.1))

  model.compile(loss=tf.keras.losses.BinaryCrossentropy(), 
                optimizer=tf.keras.optimizers.Adam(), metrics=['accuracy'])
  
  def converter(features, labels):
    '''used for multivariate input'''
    return (features[:,:,0:1], features[:,:,1:2]), labels

  # trainDataset = window.train.map(converter)

  history = model.fit(window.train, epochs=epochs, 
                      callbacks=[early_stopping])

  return history
  

In [None]:
history = compile_and_fit(model, window)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


### Save Model

In [None]:
!sudo pip install h5py

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
# Saving model to JSON file
model_json = model.to_json()
with open("drive/MyDrive/grad/models/functional/CNNLSTM10E.json", "w") as json_file:
  json_file.write(model_json)

# serialize weights to HDF5
model.save_weights("drive/MyDrive/grad/models/functional/CNNLSTM10E.h5")
print("Model saved")

Model saved


### Load model and fit


In [None]:
from tensorflow.keras.models import model_from_json
jsonFile = open("drive/MyDrive/grad/models/functional/CNNLSTM10E.json", 'r')
loadedJsonModel = jsonFile.read()
jsonFile.close()

loadedModel = model_from_json(loadedJsonModel)
# load weights
loadedModel.load_weights("drive/MyDrive/grad/models/functional/CNNLSTM10E.h5")

loadedModel.summary()


Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 128)]             0         
                                                                 
 embedding (Embedding)       (None, 128, 256)          406195200 
                                                                 
 lstm (LSTM)                 (None, 128, 128)          197120    
                                                                 
 dropout (Dropout)           (None, 128, 128)          0         
                                                                 
 time_distributed (TimeDistr  (None, 128, 1)           129       
 ibuted)                                                         
                                                                 
Total params: 406,392,449
Trainable params: 406,392,449
Non-trainable params: 0
_______________________________________________

In [None]:
history = compile_and_fit(loadedModel, window)

Epoch 1/5
Epoch 2/5
 6615/37837 [====>.........................] - ETA: 1:05:03 - loss: 0.6730 - accuracy: 0.5750

Save loaded model

In [None]:
# Saving model to JSON file
model_json = loadedModel.to_json()
with open("drive/MyDrive/grad/models/functional/CNNLSTM10E.json", "w") as json_file:
  json_file.write(model_json)

# serialize weights to HDF5
loadedModel.save_weights("drive/MyDrive/grad/models/functional/CNNLSTM10E.h5")

In [None]:
# Stacked LSTM
# reduced output dimension of embedding layer (experimenting)
# deleted gradient clipping (gradient clipping could've been unnecessary)
# running on two epochs

# hyperparameters for experiment:
#   - window size
#   - learning_rate
#   - dropout rate

In [None]:
!nvidia-smi
!pip show tensorflow
!nvcc --version

In [None]:
history.history

In [None]:
# def converter(features, labels):
#   return (features[:,:,0:1], features[:,:,1:2]), labels

# testDataset = window.test.map(converter)
results = model.evaluate(window.test)
print(results)

In [None]:
def converter(features, labels):
  return (features[:,:,0:1], features[:,:,1:2]), labels

testDataset = window.test.map(converter)
predictions = model.predict(window.test)


In [None]:
print(predictions.shape)

In [None]:
idx = 0
print(predictions[idx])
print(testData["hot/cold"][idx+WINDOW_SIZE -1])

In [None]:
correct = 0
pSize = predictions.size
numberOfOnes = 0
numberOfZeroes = 0
wrong = 0

for i in range(pSize):
  if predictions[i][0] >= 0.5 and testData["hot/cold"][i + WINDOW_SIZE - 1] == 1:
    correct += 1
    numberOfOnes += 1
  elif predictions[i][0] < 0.5 and testData["hot/cold"][i + WINDOW_SIZE - 1] == 0:
    correct += 1
    numberOfZeroes += 1
  else:
    wrong += 1

print("Accuracy: ", correct / pSize)
print("Ones: ", numberOfOnes)
print("Zeroes: ", numberOfZeroes)
print("Wrong: ", wrong)

In [None]:
mean = predictions.mean()
print("mean: ", mean)

In [None]:
!mkdir -p drive/MyDrive/grad/models/functional
model.save('drive/MyDrive/grad/models/functional/cnnlstm_uni_batchnorm')

Plotting

In [None]:
predictions.size

In [None]:
# the histogram of the data
n, bins, patches = plt.hist(predictions, 150, density=True, facecolor='g', alpha=0.75)
plt.xlabel('Probability')
plt.ylabel('Density')
plt.title('Histogram of model prediction')
plt.xlim(0, 1)
plt.grid(True)
plt.show()

In [None]:
  #Optimizer 
# loss_fn = tf.keras.losses.SparseCategoricalCrossentropy()
# opt = tf.keras.optimizers.Adam(learning_rate=0.001)
# grads_and_vars = opt.compute_gradients(loss_fn)
# model.compile(optimizer=opt, loss="mse")
# model.summary()

In [None]:
#One hot encoded shit
# train_x = np.asarray(tensorTrainingData)
# test_x = np.asarray(tensorTestData)

In [None]:
# print(len(windowedFeatures))
# print(len(target))

# windowedFeatures.shape

In [None]:
# model.fit(windowedFeatures[:10000], windowedLabels[:10000], epochs=1, verbose=1)

In [None]:
# y_hat = model.predict(test_x, verbose=1)
# y_hat.shape

In [None]:
# correct = 0
# for i in range(10000):
#     if y_hat[i] == testTarget[i]:
#         correct += 1

# print(f"Accuracy: {correct/10000}")