In [None]:
!pip install -q kaggle

In [None]:
from google.colab import files
files.upload()

In [None]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json
!ls ~/.kaggle

kaggle.json


In [None]:
!kaggle datasets download -d icarofreire/dollar-prices-and-infos

Downloading dollar-prices-and-infos.zip to /content
  0% 0.00/1.29M [00:00<?, ?B/s]
100% 1.29M/1.29M [00:00<00:00, 86.8MB/s]


In [None]:
!mkdir dollar-prices-and-infos.zip
!unzip dollar-prices-and-infos.zip -d dollar-prices-and-infos
!ls dollar-prices-and-infos

mkdir: cannot create directory ‘dollar-prices-and-infos.zip’: File exists
Archive:  dollar-prices-and-infos.zip
  inflating: dollar-prices-and-infos/database_15min.csv  
database_15min.csv


In [None]:
import pandas as pd
import numpy as np 
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from sklearn.model_selection import train_test_split

In [None]:
df = pd.read_csv('/content/dollar-prices-and-infos/database_15min.csv')

df.set_axis(['Date', 'Open', 'Max', 'Min', 'Close', 'Volume', 'fin', 'negociation', 'mme13', 'mme72', 'high_mean', 'low_mean', 'diffmacd', 'deamacd', 'macdlh', 'difflh', 'dealh', 'Result'], axis=1, inplace=True)

df.head(5)

Unnamed: 0,Date,Open,Max,Min,Close,Volume,fin,negociation,mme13,mme72,high_mean,low_mean,diffmacd,deamacd,macdlh,difflh,dealh,Result
0,21-11-19 09:04,4206.5,4206.5,4205.0,4205.5,4451,187200190.0,1175,4202.1,4202.5,4202.9,4201.3,-0.304,-0.136,0.459,-0.09,-0.32,4201.0
1,21-11-19 09:05,4205.5,4206.5,4203.0,4204.5,6515,273933345.0,1627,4202.5,4202.5,4203.1,4201.4,-0.099,-0.134,0.679,0.105,-0.235,4199.0
2,21-11-19 09:06,4204.5,4204.5,4203.0,4203.5,2751,115640535.0,801,4202.6,4202.5,4203.1,4201.4,-0.005,-0.127,0.658,0.176,-0.153,4201.0
3,21-11-19 09:07,4203.0,4204.0,4202.0,4202.5,3360,141218695.0,1038,4202.6,4202.5,4203.1,4201.4,-0.007,-0.12,0.486,0.151,-0.092,4207.0
4,21-11-19 09:08,4202.0,4203.0,4201.5,4202.0,2545,106941715.0,900,4202.5,4202.5,4203.1,4201.4,-0.051,-0.116,0.29,0.089,-0.056,4211.0


In [None]:
df.shape

(33077, 18)

In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 33077 entries, 0 to 33076
Data columns (total 18 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Date         33077 non-null  object 
 1   Open         33077 non-null  float64
 2   Max          33077 non-null  float64
 3   Min          33077 non-null  float64
 4   Close        33077 non-null  float64
 5   Volume       33077 non-null  int64  
 6   fin          33077 non-null  float64
 7   negociation  33077 non-null  int64  
 8   mme13        33077 non-null  float64
 9   mme72        33077 non-null  float64
 10  high_mean    33077 non-null  float64
 11  low_mean     33077 non-null  float64
 12  diffmacd     33077 non-null  float64
 13  deamacd      33077 non-null  float64
 14  macdlh       33077 non-null  float64
 15  difflh       33077 non-null  float64
 16  dealh        33077 non-null  float64
 17  Result       33077 non-null  float64
dtypes: float64(15), int64(2), object(1)
memory usa

In [None]:
df.isna().sum()

Date           0
Open           0
Max            0
Min            0
Close          0
Volume         0
fin            0
negociation    0
mme13          0
mme72          0
high_mean      0
low_mean       0
diffmacd       0
deamacd        0
macdlh         0
difflh         0
dealh          0
Result         0
dtype: int64

In [None]:
# scaling the data of the open price
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler(feature_range=(0,1))
sc_open = sc.fit_transform(df[['Open']].values.reshape(-1,1))
print (sc_open)
print (sc_open.shape)

[[0.13338891]
 [0.13255523]
 [0.13172155]
 ...
 [0.95956649]
 [0.95956649]
 [0.95956649]]
(33077, 1)


In [None]:
# define the function split the dataset (80% train, 20% test)
def split_dataset(sc_data):
    train_size = int(len(sc_data)*0.8)
    test_size = len(sc_data)-train_size
    train_data = sc_data[0:train_size,:]
    test_data = sc_data[train_size:len(sc_data),:1]
    return train_size, test_size, train_data, test_data

In [None]:
train_size, test_size, train_data, test_data = split_dataset(sc_open)

In [None]:
train_data.shape

(26461, 1)

In [None]:
test_data.shape

(6616, 1)

In [None]:
# define the function to create train and test dataset
def create_dataset(data, timestamp=1):
    x_dataset = []
    y_dataset = []
    for i in range(len(data)-timestamp-1):
        x_dataset.append(data[i:(i+timestamp), 0])
        y_dataset.append(data[timestamp+i,0])
    return np.array(x_dataset), np.array(y_dataset)

In [None]:
# set timestamp to be 100 for training
timestamp = 100
# create the train and test dataset of open price
x_train, y_train = create_dataset(train_data, timestamp)
x_test, y_test = create_dataset(test_data, timestamp)

In [None]:
x_train.shape, y_train.shape

((26360, 100), (26360,))

In [None]:
x_test.shape, y_test.shape

((6515, 100), (6515,))

In [None]:
# define a function to reshape the dataset for using LSTM
def reshape_data(x_train, x_test):
    x_train = x_train.reshape(x_train.shape[0],x_train.shape[1],1)
    x_test = x_test.reshape(x_test.shape[0],x_test.shape[1],1)
    return x_train, x_test

In [None]:
# reshape the dataset of the open price
x_train, x_test = reshape_data(x_train, x_test)

In [None]:
def lstm_model():
    model = Sequential()
    model.add(LSTM(units = 50, return_sequences = True, input_shape = (100,1)))
    model.add(Dropout(0.2))
    model.add(LSTM(units = 50, return_sequences = True))
    model.add(Dropout(0.2))
    model.add(LSTM(units = 50))
    model.add(Dropout(0.2))
    model.add(Dense(1))
    return model    

In [None]:
model = lstm_model()
optimizer = tf.keras.optimizers.SGD(lr=1.0000e-04, momentum=0.9)
model.compile(loss=tf.keras.losses.Huber(),
              optimizer=optimizer,
              metrics=["mae"])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 100, 50)           10400     
_________________________________________________________________
dropout (Dropout)            (None, 100, 50)           0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 100, 50)           20200     
_________________________________________________________________
dropout_1 (Dropout)          (None, 100, 50)           0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 50)                20200     
_________________________________________________________________
dropout_2 (Dropout)          (None, 50)                0         
_________________________________________________________________
dense (Dense)                (None, 1)                 5

  "The `lr` argument is deprecated, use `learning_rate` instead.")


In [None]:
model.fit(x_train, y_train, epochs = 10, batch_size = 64)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fe9f2144610>