In [5]:
!pip install tensorflow_addons

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting tensorflow_addons
  Downloading tensorflow_addons-0.20.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (591 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m591.0/591.0 kB[0m [31m43.0 MB/s[0m eta [36m0:00:00[0m
Collecting typeguard<3.0.0,>=2.7 (from tensorflow_addons)
  Downloading typeguard-2.13.3-py3-none-any.whl (17 kB)
Installing collected packages: typeguard, tensorflow_addons
Successfully installed tensorflow_addons-0.20.0 typeguard-2.13.3


In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import seaborn as sns

from sklearn.preprocessing import LabelBinarizer
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

import tensorflow as tf
import tensorflow_addons as tfa

from datetime import date


TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 



In [7]:
train = pd.read_csv("training_set.csv", index_col = "train_idx")
test = pd.read_csv("testing_set.csv", index_col = "test_idx")

train['Time'] = pd.to_datetime(train['Time'])
test['Time'] = pd.to_datetime(test['Time'])

train = train.dropna()

train = train[train['Open'].between(1, 2)].copy()
train = train[train['High'].between(1, 2)].copy()
train = train[train['Low'].between(1, 2)].copy()
train = train[train['Close'].between(1, 2)].copy()

train = train.reset_index()

train['media'] = (train['Open'] + train['High'] + train['Low'] + train['Close']) / 4
test['media'] = (test['Open'] + test['High'] + test['Low'] + test['Close']) / 4

In [8]:
train

Unnamed: 0,train_idx,Time,Open,High,Low,Close,Volume,label,media
0,0,2007-03-07,1.31258,1.31844,1.31086,1.31648,1807377.0,0,1.314590
1,1,2007-03-08,1.31667,1.31813,1.31154,1.31396,1995920.0,1,1.315075
2,2,2007-03-09,1.31396,1.31583,1.30864,1.31118,1859100.0,1,1.312402
3,3,2007-03-11,1.31236,1.31415,1.31068,1.31175,192381.0,1,1.312235
4,5,2007-03-13,1.31811,1.32186,1.31547,1.31927,2003765.0,1,1.318677
...,...,...,...,...,...,...,...,...,...
3241,3504,2018-05-18,1.18053,1.18221,1.17498,1.17689,220609.0,1,1.178653
3242,3505,2018-05-20,1.17684,1.17756,1.17441,1.17474,18855.0,0,1.175887
3243,3506,2018-05-21,1.17473,1.17973,1.17165,1.17913,227861.0,0,1.176310
3244,3507,2018-05-22,1.17914,1.18296,1.17567,1.17824,255729.0,0,1.179002


In [9]:
train_features = np.array(train['media'])
train_labels = np.array(train['label'])
# Define the split time
split_time = int(train_labels.shape[0] * 0.75)

x_train = train_features[:split_time]
y_train = train_labels[:split_time]
# Get the validation set
x_valid = train_features[split_time:]
y_valid = train_labels[split_time:]

## Prepare Features and Labels

In [10]:
# Parameters
window_size = 20
batch_size = 32
shuffle_buffer_size = 1000

In [11]:
data = []
for i in range(x_train.shape[0]):
  data.append([x_train[i],y_train[i]])

data = np.array(data)
data

array([[1.31459  , 0.       ],
       [1.315075 , 1.       ],
       [1.3124025, 1.       ],
       ...,
       [1.1073325, 0.       ],
       [1.1021125, 1.       ],
       [1.0945825, 0.       ]])

In [12]:
def windowed_dataset(data, window_size, batch_size, shuffle_buffer):
    dataset = tf.data.Dataset.from_tensor_slices(data)
    dataset = dataset.window(window_size + 1, shift=1, drop_remainder=True)
    dataset = dataset.flat_map(lambda window: window.batch(window_size + 1))

    # Crea tuplas con las caracteristicas y el label
    dataset = dataset.map(lambda window: (window[:-1,0], window[-1,1]))

    # Shuffle the windows
    dataset = dataset.shuffle(shuffle_buffer)
    
    # Create batches of windows
    dataset = dataset.batch(batch_size).prefetch(1)
    
    return dataset


In [16]:
window_size = 2  
print(f'Probando con una window de {window_size}')
# Generate the dataset windows
dataset = windowed_dataset(data, window_size, batch_size, shuffle_buffer_size)

# Reset states generated by Keras
tf.keras.backend.clear_session()

# Build the model
model = tf.keras.models.Sequential([
  tf.keras.layers.Lambda(lambda x: tf.expand_dims(x, axis=-1),
                      input_shape=[window_size]),
  tf.keras.layers.SimpleRNN(40, return_sequences=True),
  tf.keras.layers.SimpleRNN(40),
  tf.keras.layers.Dense(1),
])

model.compile(loss='binary_crossentropy',optimizer='adam',metrics=[tfa.metrics.F1Score(num_classes=1, average='macro',threshold=0.5)])

# Train the model
history = model.fit(dataset,epochs=100)

print(np.max(history.history['f1_score']))

Probando con una window de 2
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch

## Model Prediction

You will then generate batches of windows to generate predictions that align with the validation set.

In [17]:
def model_forecast(model, series, window_size, batch_size):
    # Generate a TF Dataset from the series values
    dataset = tf.data.Dataset.from_tensor_slices(series)

    # Window the data but only take those with the specified size
    dataset = dataset.window(window_size, shift=1, drop_remainder=True)

    # Flatten the windows by putting its elements in a single batch
    dataset = dataset.flat_map(lambda w: w.batch(window_size))
    
    # Create batches of windows
    dataset = dataset.batch(batch_size).prefetch(1)
    
    # Get predictions on the entire dataset
    forecast = model.predict(dataset)
    
    return forecast

In [21]:
# Reduce the original series
forecast_series = np.concatenate((train_features[-window_size:], np.array(test['media'])))

# Use helper function to generate predictions
forecast = model_forecast(model, forecast_series, window_size, batch_size)

# Drop single dimensional axis
results = forecast.squeeze()

# Plot the results
pred = np.where(results > 0.5, 1, 0)



In [24]:
import json
diccionario = {}
for i in test.index:
    diccionario[i] = int(pred[i])
# Data to be written
json_entrega = {
    "target": diccionario
}
# Serializing json
json_object = json.dumps(json_entrega, indent=4)

# Writing to sample.json
with open("entrega.json", "w") as outfile:
    outfile.write(json_object)

In [26]:
train['label'].value_counts()

0    1632
1    1614
Name: label, dtype: int64