<a href="https://colab.research.google.com/github/codingbear314/Spark/blob/main/FinanceModel001.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Loading Data from yahoo finance

In [1]:
import yfinance as yf
import torch
from collections import deque

def Load_Dataset():
  dataset = []

  company_list = [
                    'AAPL',
                    'GOOG',
                    'NVDA',
                    'TSLA',
                    '005930.KS',
                    '000660.KS',
                    '035420.KS',
                    '035720.KS',
                    'MSFT',
                    'GOOGL',
                    'AMZN',
                    'META',
                    'AMD',
                    'V',
                    'BRK-B',
                    'JNJ',
                    'BABA',
                    'TSM',
                    'PG'
                  ]

  for company in company_list:
      slidingWindow = deque()
      ticker = yf.Ticker(company)
      data = ticker.history(interval = '1d', period = 'max', auto_adjust = True)

      Open = list(data['Open'])
      Close = list(data['Close'])
      High = list(data['High'])
      Low = list(data['Low'])

      if len(Open) < 61:
          print(f"{company} Doesn't have enough data!")
          continue
      addindex = 61
      for i in range(addindex):
          slidingWindow.append(torch.tensor([100000*Open[i], 100000*High[i], 100000*Low[i], 100000*Close[i]], dtype = torch.float32))
      while addindex+1 < len(Open):
          dataset.append(
              (
                  torch.stack(list(slidingWindow)[:-1]),
                  list(slidingWindow)[-1]
              )
          )
          addindex += 1
          slidingWindow.append(torch.tensor([100000*Open[addindex], 100000*High[addindex], 100000*Low[addindex], 100000*Close[addindex]], dtype = torch.float32))
          slidingWindow.popleft()
  return dataset

### Mount Drive

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


### Model structure

* 60*4 input layer
* Convolution layer (Conv1d, size of 8, input channel 4 to output channel 16)
* Convolution layer (Conv1d, size of 8, input channel 16 to output channel 32)
* Fully connected layer (Linear, size of 40 to 20)
* Fully connected layer (Linear, size of 20 to 4)
* 4 output layer

All activations ReLU

In [3]:
import torch
import torch.nn as nn

class Finance_001_Model(nn.Module):
    def __init__(self):
        super(Finance_001_Model, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=4, out_channels=16, kernel_size=8)
        conv1_out_side = 60 - 8 + 1
        self.conv2 = nn.Conv1d(in_channels=16, out_channels=32, kernel_size=8)
        conv2_out_side = conv1_out_side - 8 + 1
        self.dense1 = nn.Linear(32 * conv2_out_side, 40)
        self.dense2 = nn.Linear(40, 20)
        self.dense3 = nn.Linear(20, 4)
        self.relu = nn.ReLU()

    def forward(self, x):
      x = x.permute(0, 2, 1)
      x = self.relu(self.conv1(x))
      x = self.relu(self.conv2(x))
      batch_size = x.size(0)
      x = x.view(batch_size, -1)
      x = self.relu(self.dense1(x))
      x = self.relu(self.dense2(x))
      x = self.dense3(x)
      return x

### Data Loader

In [4]:
class FinanceDataset(torch.utils.data.Dataset):
    def __init__(self, dataset):
        self.dataset = dataset
    def __len__(self):
        return len(self.dataset)
    def __getitem__(self, idx):
        return self.dataset[idx]

### Split the dataset

In [5]:
FinanceDataset = FinanceDataset(Load_Dataset())
train_size = int(0.8 * len(FinanceDataset))
validation_size = int(0.1 * len(FinanceDataset))
test_size = len(FinanceDataset) - train_size - validation_size
train_dataset, test_dataset, validation_dataset = torch.utils.data.random_split(FinanceDataset, [train_size, test_size, validation_size])

print(f"Train size: {len(train_dataset)}")
print(f"Validation size: {len(validation_dataset)}")
print(f"Test size: {len(test_dataset)}")

Train_Dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
Validation_Dataloader = torch.utils.data.DataLoader(validation_dataset, batch_size=32, shuffle=True)
Test_Dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=True)

Train size: 108816
Validation size: 13602
Test size: 13603


In [6]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = Finance_001_Model().to(device)
print(f"Training on {device}")

Training on cuda


In [7]:
critertion = nn.MSELoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

### Load the model

In [8]:
model.load_state_dict(torch.load("/content/drive/My Drive/Spark/model_21.pt"))

<All keys matched successfully>

### Train the model

In [12]:
def train(model, train_dataloader, validation_dataloader, critertion, optimizer, epochs):
    lowest_val_loss = float('inf')
    for epoch in range(epochs):
      model.train()
      epoch_loss = 0.0
      for X_batch, Y_batch in train_dataloader:
        optimizer.zero_grad()
        output = model(X_batch.to(device))
        loss = critertion(output, Y_batch.to(device))
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item() * X_batch.size(0)
      train_loss = epoch_loss / len(train_dataloader.dataset)

      model.eval()
      validation_loss = 0.0
      with torch.inference_mode():
        for X_batch, Y_batch in validation_dataloader:
          output = model(X_batch.to(device))
          loss = critertion(output, Y_batch.to(device))
          validation_loss += loss.item() * X_batch.size(0)
      validation_loss = validation_loss / len(validation_dataloader.dataset)

      print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.4f}, Validation Loss: {validation_loss:.4f}")
      if validation_loss < lowest_val_loss:
        lowest_val_loss = validation_loss
        torch.save(model.state_dict(), f"/content/drive/My Drive/Spark/best_model.pt")
      if (epoch+1) % 10 == 0:
        torch.save(model.state_dict(), f"/content/drive/My Drive/Spark/model_{epoch+1}.pt")

In [None]:
train(model, Train_Dataloader, Validation_Dataloader, critertion, optimizer, 100)

Epoch 1/100, Train Loss: 10857947780538500.0000, Validation Loss: 15062899117218174.0000
Epoch 2/100, Train Loss: 10785193893884118.0000, Validation Loss: 6277810411159329.0000
Epoch 3/100, Train Loss: 11208698329095952.0000, Validation Loss: 8392654671720947.0000
Epoch 4/100, Train Loss: 10794569711622302.0000, Validation Loss: 5535583741290856.0000
Epoch 5/100, Train Loss: 10403450123683722.0000, Validation Loss: 6165869712081561.0000
Epoch 6/100, Train Loss: 13183485935090028.0000, Validation Loss: 6128452223791353.0000
Epoch 7/100, Train Loss: 9802963069023158.0000, Validation Loss: 10628638929562594.0000
Epoch 8/100, Train Loss: 10538949644571538.0000, Validation Loss: 14973881210106184.0000
Epoch 9/100, Train Loss: 11556666845843356.0000, Validation Loss: 7953207129890321.0000
Epoch 10/100, Train Loss: 9955421552362484.0000, Validation Loss: 5579894407315807.0000
Epoch 11/100, Train Loss: 11781470549942482.0000, Validation Loss: 40162329170674896.0000
Epoch 12/100, Train Loss: 10