In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
from torch import nn, optim
from torchvision import transforms
from torchvision.models import resnet50

In [3]:
def normalize(data, end):
    data = data.copy()
    data['Volume'] = (data['Volume'] / data['Volume'].max())
    min_low = data.iloc[:end]['Low'].min()
    for c in ['Open','High','Low','Close']:
        data[c] -= min_low
    max_high = data.iloc[:end]['High'].max()
    for c in ['Open','High','Low','Close']:
        data[c] /= max_high
    return data

In [4]:
# split a multivariate sequence into samples
def split_sequences(sequences, n_steps_in, n_steps_out):
    X, y = list(), list()
    for i in range(len(sequences)):
        # find the end of this pattern
        end_ix = i + n_steps_in
        out_end_ix = end_ix + n_steps_out-1
        # check if we are beyond the dataset
        if out_end_ix > len(sequences):
            break

        sequences_norm = normalize(sequences.iloc[i:out_end_ix], end_ix)
        # gather input and output parts of the pattern
        seq_x, seq_y = sequences_norm.iloc[:n_steps_in], sequences_norm.iloc[n_steps_in-1:]['Close']
        X.append(seq_x.values.tolist())
        y.append(seq_y.values.tolist())
    return X, y

In [5]:
df = pd.read_csv('./dukascopy/AAPL.USUSD_Candlestick_1_M_BID_01.01.2021-26.02.2021.csv')
df = df[df['Volume'] > 0]
df['Local time'] = pd.to_datetime(df['Local time'])

In [8]:
a.to_timestamp()

TypeError: unsupported Type Int64Index

In [69]:
steps = []
for local_time in df['Local time']:
    hour = local_time.time().hour
    minute = local_time.time().minute
    steps.append((hour - 6) + minute/60 - 0.5)
df['step'] = steps

In [70]:
del df['Local time']

In [147]:
n_steps_in = 60
n_steps_out = 5
X, y = split_sequences(df, n_steps_in, n_steps_out)

In [155]:
sample_size = 60
temp = []
for i in range(int(len(X) / sample_size), 1, -1):
    temp2 = []
    for j in range(sample_size):
        temp2.insert(0, X[(i*sample_size) + j:(i+1)*sample_size + j])
    temp.append(temp2)

In [159]:
np.array(temp).shape

  """Entry point for launching an IPython kernel.


(242, 60)

In [165]:
np.array(temp2).shape

(60, 60, 60, 6)

In [166]:
np.array(X).shape, np.array(y).shape

((14593, 60, 6), (14593, 5))

In [77]:
n_features = X.shape[2]

In [78]:
X_test_size = int(len(X)*0.2)
y_test_size = int(len(y)*0.2)
X_train, X_test, y_train, y_test = X[:-X_test_size], X[-X_test_size:], y[:-y_test_size], y[-y_test_size:]

In [79]:
X_train = torch.tensor(X_train).float()
X_test = torch.tensor(X_test).float()
y_train = torch.tensor(y_train).float()
y_test = torch.tensor(y_test).float()
if torch.cuda.is_available():
    X_train = X_train.cuda().float()
    X_test = X_test.cuda().float()
    y_train = y_train.cuda().float()
    y_test = y_test.cuda().float()

In [80]:
X_train.shape

torch.Size([11675, 60, 6])

In [135]:
class NeuralNet(nn.Module):
    def __init__(self):
        super(NeuralNet, self).__init__()
        self.conv = nn.Conv1d(n_steps_in, n_steps_out, kernel_size=n_features)
        self.pool = nn.MaxPool1d(1)
        self.hidden= nn.Linear(n_steps_out, 50)
        self.out = nn.Linear(50, 10)
        self.act = nn.ReLU()

    def forward(self, x):
        x = self.act(self.conv(x)) # [batch_size, 28, 26, 26]
        x = self.pool(x) # [batch_size, 28, 13, 13]
        x = x.view(x.size(0), -1) # [batch_size, 28*13*13=4732]
        x = self.act(self.hidden(x)) # [batch_size, 128]
        x = self.out(x) # [batch_size, 10]
        return x

In [143]:
model = resnet50(pretrained=False, num_classes=n_steps_out)

In [136]:
model = NeuralNet()

In [144]:
loss_fn = torch.nn.MSELoss(reduction='mean')

if torch.cuda.is_available():
    model = model.cuda()
    loss_fn = loss_fn.cuda()

In [138]:
lr = 1.0e-6

optimizer = optim.Adam(model.parameters(), lr=lr)

In [145]:
batch_size = 128
for t in range(101):
    running_loss = 0
    for i in range(int(len(X_train) / batch_size)):
        # Forward pass: compute predicted y by passing x to the model.
        y_pred = model(X_train[i*batch_size:(i+1)*batch_size])

        # Compute and print loss.
        loss = loss_fn(y_pred, y_train[i*batch_size:(i+1)*batch_size])        
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    if t % 100 == 0:
        print(t, running_loss)

RuntimeError: Expected 4-dimensional input for 4-dimensional weight [64, 3, 7, 7], but got 3-dimensional input of size [128, 60, 6] instead

In [116]:
y_pred = model(X_test)

In [117]:
for pred in y_pred:
    pred_l = pred.tolist()
    if all([pred_l[i+1] > pred_l[i] for i in range(len(pred_l)-1)]):
        print('here')
        break

In [118]:
X_test.shape, y_pred.shape

(torch.Size([2918, 60, 6]), torch.Size([2918, 5]))

In [169]:
df = pd.read_csv('./dukascopy/AAPL.USUSD_Candlestick_1_M_BID_01.01.2021-26.02.2021.csv')

In [173]:
df.iloc[10000]['Local time']

'07.01.2021 22:40:00.000 GMT-0800'

In [174]:
dates = pd.to_datetime(df['Local time'])

In [175]:
d = dates[0]

In [178]:
d.date().isoformat()

'2021-01-01'

In [180]:
df['Local time'].iloc[0]

'01.01.2021 00:00:00.000 GMT-0800'

In [183]:
d.

datetime.date(2021, 1, 1)

In [179]:
df[df['Date'].str.contains(d.date().isoformat())]

Unnamed: 0,Local time,Open,High,Low,Close,Volume


In [181]:
df = pd.read_csv('../cs229-master/MyData/AAPL.csv')

In [185]:
len(df['Date'].unique())

38