<a href="https://colab.research.google.com/github/codingbear314/Spark/blob/main/FinanceModel001.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Loading Data from yahoo finance

In [53]:
import yfinance as yf
import torch
from collections import deque

def Load_Dataset():
  dataset = []

  company_list = [
                    "AAPL", "MSFT", "AMZN", "GOOGL", "GOOG", "META", "TSLA", "NVDA",
    "JPM", "JNJ", "V", "PG", "UNH", "HD", "MA", "DIS", "NFLX", "PYPL", "INTC",
    "CSCO", "PEP", "KO", "MRK", "PFE", "ABBV", "CVX", "XOM", "BA", "MCD",
    "NKE", "SBUX", "GE", "IBM", "CAT", "MMM", "HON", "AXP", "GS", "WFC",
    "BAC", "C", "MS", "SCHW", "BLK", "COST", "WMT", "TGT", "LOW", "CVS",
    "WBA", "KR", "T", "VZ", "TMUS", "QCOM", "AVGO", "TXN", "AMD", "MU",
    "CRM", "ORCL", "ADBE", "NOW", "SHOP", "SQ", "TWLO", "ZM", "PLTR", "SNOW",
    "DOCU", "CRWD", "OKTA", "RNG", "TEAM", "WDAY", "MDB", "ESTC",
    "DDOG", "ZS", "FTNT", "PANW", "ZI", "HUBS", "WIX", "SQSP", "TTD",
    "ROKU", "SNAP", "PINS", "ETSY", "CHWY", "PTON", "BYND", "FVRR", "UPWK",
    "Z", "RDFN"
                  ]

  for company in company_list:
      slidingWindow = deque()
      ticker = yf.Ticker(company)
      data = ticker.history(interval = '1d', period = 'max', auto_adjust = True)

      Open = list(data['Open'])
      Close = list(data['Close'])
      High = list(data['High'])
      Low = list(data['Low'])

      if len(Open) < 61:
          print(f"{company} Doesn't have enough data!")
          continue
      addindex = 61
      for i in range(addindex):
          slidingWindow.append(torch.tensor([100000*Open[i], 100000*High[i], 100000*Low[i], 100000*Close[i]], dtype = torch.float32))
      while addindex+1 < len(Open):
          dataset.append(
              (
                  torch.stack(list(slidingWindow)[:-1]),
                  list(slidingWindow)[-1]
              )
          )
          addindex += 1
          slidingWindow.append(torch.tensor([100000*Open[addindex], 100000*High[addindex], 100000*Low[addindex], 100000*Close[addindex]], dtype = torch.float32))
          slidingWindow.popleft()
  return dataset

### Mount Drive

In [54]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


### Model structure

* 60*4 input layer
* Convolution layer (Conv1d, size of 8, input channel 4 to output channel 16)
* Convolution layer (Conv1d, size of 8, input channel 16 to output channel 32)
* Fully connected layer (Linear, size of 40 to 20)
* Fully connected layer (Linear, size of 20 to 4)
* 4 output layer

All activations ReLU

In [55]:
import torch
import torch.nn as nn

class Finance_001_Model(nn.Module):
    def __init__(self):
        super(Finance_001_Model, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=4, out_channels=16, kernel_size=8)
        conv1_out_side = 60 - 8 + 1
        self.conv2 = nn.Conv1d(in_channels=16, out_channels=32, kernel_size=8)
        conv2_out_side = conv1_out_side - 8 + 1
        self.dense1 = nn.Linear(32 * conv2_out_side, 40)
        self.dense2 = nn.Linear(40, 20)
        self.dense3 = nn.Linear(20, 4)
        self.relu = nn.ReLU()

    def forward(self, x):
      x = x.permute(0, 2, 1)
      x = self.relu(self.conv1(x))
      x = self.relu(self.conv2(x))
      batch_size = x.size(0)
      x = x.view(batch_size, -1)
      x = self.relu(self.dense1(x))
      x = self.relu(self.dense2(x))
      x = self.dense3(x)
      return x

### Data Loader

In [56]:
class FinanceDataset(torch.utils.data.Dataset):
    def __init__(self, dataset):
        self.dataset = dataset
    def __len__(self):
        return len(self.dataset)
    def __getitem__(self, idx):
        return self.dataset[idx]

### Split the dataset

In [57]:
Financedataset = FinanceDataset(Load_Dataset())
train_size = int(0.8 * len(Financedataset))
validation_size = int(0.1 * len(Financedataset))
test_size = len(Financedataset) - train_size - validation_size
train_dataset, test_dataset, validation_dataset = torch.utils.data.random_split(Financedataset, [train_size, test_size, validation_size])

print(f"Train size: {len(train_dataset)}")
print(f"Validation size: {len(validation_dataset)}")
print(f"Test size: {len(test_dataset)}")

Train_Dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
Validation_Dataloader = torch.utils.data.DataLoader(validation_dataset, batch_size=32, shuffle=True)
Test_Dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=True)

Train size: 565646
Validation size: 70705
Test size: 70707


In [58]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = Finance_001_Model().to(device)
print(f"Training on {device}")

Training on cuda


In [64]:
critertion = nn.MSELoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.00001)

### Load the model

In [63]:
model.load_state_dict(torch.load("/content/drive/My Drive/Spark/best_model.pt"))

<All keys matched successfully>

### Train the model

In [65]:
def train(model, train_dataloader, validation_dataloader, critertion, optimizer, epochs):
    lowest_val_loss = float('inf')
    for epoch in range(epochs):
      model.train()
      epoch_loss = 0.0
      for X_batch, Y_batch in train_dataloader:
        optimizer.zero_grad()
        output = model(X_batch.to(device))
        loss = critertion(output, Y_batch.to(device))
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item() * X_batch.size(0)
      train_loss = epoch_loss / len(train_dataloader.dataset)

      model.eval()
      validation_loss = 0.0
      with torch.inference_mode():
        for X_batch, Y_batch in validation_dataloader:
          output = model(X_batch.to(device))
          loss = critertion(output, Y_batch.to(device))
          validation_loss += loss.item() * X_batch.size(0)
      validation_loss = validation_loss / len(validation_dataloader.dataset)

      print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.4f}, Validation Loss: {validation_loss:.4f}")
      if validation_loss < lowest_val_loss:
        lowest_val_loss = validation_loss
        torch.save(model.state_dict(), f"/content/drive/My Drive/Spark/best_model.pt")
        print(f"\tNew best model with loss {validation_loss} saved.")
      if (epoch+1) % 10 == 0:
        torch.save(model.state_dict(), f"/content/drive/My Drive/Spark/model_{epoch+1}.pt")
      if epoch == epochs-1:
        torch.save(model.state_dict(), f"/content/drive/My Drive/Spark/model_latest.pt")
        print(f"Latest model with loss {validation_loss} saved")

    print("Training finished")
    print(f"Lowest validation loss: {lowest_val_loss}")

In [66]:
train(model, Train_Dataloader, Validation_Dataloader, critertion, optimizer, 10)

Epoch 1/10, Train Loss: 33412043966.0419, Validation Loss: 28309531386.9347
	New best model with loss 28309531386.934673 saved.
Epoch 2/10, Train Loss: 33387487339.3212, Validation Loss: 28657148128.0801
Epoch 3/10, Train Loss: 33359711853.1758, Validation Loss: 28264628260.1488
	New best model with loss 28264628260.148846 saved.
Epoch 4/10, Train Loss: 33292674316.4437, Validation Loss: 28247993148.4689
	New best model with loss 28247993148.468933 saved.
Epoch 5/10, Train Loss: 33354130760.4481, Validation Loss: 28405871996.6635
Epoch 6/10, Train Loss: 33319832935.9332, Validation Loss: 30257446321.3299
Epoch 7/10, Train Loss: 33302069887.0518, Validation Loss: 28421441385.1915
Epoch 8/10, Train Loss: 33348124902.4338, Validation Loss: 28328918796.4153
Epoch 9/10, Train Loss: 33367425214.0419, Validation Loss: 28385122297.3959
Epoch 10/10, Train Loss: 33369821444.4724, Validation Loss: 28357618073.6869
Latest model with loss 28357618073.686897 saved
Training finished
Lowest validation

### Using the model

In [70]:
model = Finance_001_Model().to(device)
model.load_state_dict(torch.load("/content/drive/My Drive/Spark/best_model.pt"))

<All keys matched successfully>

In [105]:
Current_Apple_Stocks = yf.Ticker('AMZN')

# Get 60 data in 1day interval
data = Current_Apple_Stocks.history(interval = '1d', period = 'max', auto_adjust = True)

Open = list(data['Open'])[-61:]
Close = list(data['Close'])[-61:]
High = list(data['High'])[-61:]
Low = list(data['Low'])[-61:]

print(f"Last day : \nAverage :\t{(Open[-2]+Close[-2])/2}\nOpen :\t{Open[-2]}\nClose :\t{Close[-2]}\nHigh :\t{High[-2]}\nLow :\t{Low[-2]}")

# Make it into a tensor
datat = []
for i in range(60):
  datat.append(torch.tensor([100000*Open[i], 100000*High[i], 100000*Low[i], 100000*Close[i]], dtype = torch.float32))
datat = torch.stack(datat)
datat = torch.unsqueeze(datat, 0)

# Predict the next day
with torch.inference_mode():
  output = model(datat.to(device))

  output = output.tolist()[0]
  for i in range(4):
    output[i] = output[i] / 100000
  print(f"Model predicted : \nAverage :\t{(output[0]+output[3])/2} ({'↑' if (output[0]+output[3])/2 > (Open[-2]+Close[-2])/2 else '↓'})\nOpen :\t{output[0]} ({'↑' if output[0] > Open[-2] else '↓'})\nClose :\t{output[3]} ({'↑' if output[3] > Close[-2] else '↓'})\nHigh :\t{output[1]}\nLow :\t{output[2]}")

print(f"Actual stock : \nAverage : \t{(Open[-1] + Close[-1])/2} ({'↑' if (Open[-1]+Close[-1])/2 > (Open[-2]+Close[-2])/2 else '↓'})\nOpen :\t{Open[-1]} ({'↑' if Open[-1] > Open[-2] else '↓'})\nClose :\t{Close[-1]} ({'↑' if Close[-1] > Close[-2] else '↓'})\nHigh :\t{High[-1]}\nLow :\t{Low[-1]}")

import pandas as pd

dta = pd.DataFrame(columns = ['Average', 'Open', 'High', 'Low', 'Close'], index = ['Predicted', 'Real', 'Difference'])
dta.loc['Predicted'] = [round((output[0]+output[3])/2,4), output[0], output[1], output[2], output[3]]
dta.loc['Real'] = [round((Open[-1] + Close[-1])/2,4), round(Open[-1], 4), round(High[-1], 4), round(Low[-1], 4), round(Close[-1], 4)]
dta.loc['Difference'] = [(output[0]+output[3])/2-(Open[-1] + Close[-1])/2, output[0]-Open[-1], output[1]-High[-1], output[2]-Low[-1], output[3]-Close[-1]]
print()
print()
print(dta)

Last day : 
Average :	196.43000030517578
Open :	195.00999450683594
Close :	197.85000610351562
High :	199.83999633789062
Low :	194.1999969482422
Model predicted : 
Average :	197.64181 (↑)
Open :	197.6052 (↑)
Close :	197.67842 (↓)
High :	199.78774
Low :	195.51698
Actual stock : 
Average : 	195.48999786376953 (↓)
Open :	197.72999572753906 (↑)
Close :	193.25 (↓)
High :	198.85000610351562
Low :	192.5


             Average      Open       High        Low      Close
Predicted   197.6418  197.6052  199.78774  195.51698  197.67842
Real          195.49    197.73     198.85      192.5     193.25
Difference  2.151812 -0.124796   0.937734    3.01698    4.42842


In [103]:
company_list = [
    "AAPL", "MSFT", "GOOGL", "AMZN", "TSLA", "BRK-B", "V", "JNJ", "WMT",
    "JPM", "NVDA", "PG", "DIS", "PYPL", "MA", "UNH", "HD", "VZ", "ADBE",
    "NFLX", "CMCSA", "KO", "NKE", "MRK", "PFE", "PEP", "INTC", "T", "ABT",
    "CSCO", "XOM", "CVX", "CRM", "ABBV", "WFC", "MDT", "BMY", "MCD", "COST",
    "TMO", "NEE", "ACN", "AVGO", "TXN", "HON", "QCOM", "PM", "LLY", "UNP",
    "LOW", "AMGN", "ORCL", "LIN", "DHR", "IBM", "SBUX", "MS", "RTX", "CAT",
    "ISRG", "EL", "GS", "LMT", "GE", "BLK", "AMD", "AMT", "BA", "PLD",
    "CVS", "SYK", "MDLZ", "ADI", "SPGI", "TGT", "AMAT", "NOW", "DE", "CB",
    "ADI", "FIS", "MMC", "ICE", "NSC", "CI", "SCHW", "SO", "GILD", "CSX",
    "PNC", "USB", "TFC", "EQIX", "CME", "ADP", "BDX", "DUK", "APD", "ITW",
    "CCI", "AON", "MET", "CL", "ECL", "WM", "SHW", "HUM", "TROW", "ETN",
    "D", "PSA", "AEP", "CMG", "MAR", "MCO", "EXC", "DLR", "FISV", "ADM",
    "APH", "PGR", "SRE", "AIG", "BK", "NOC", "TRV", "PRU", "PEG", "FTNT",
    "RSG", "MPC", "SYY", "LHX", "MCK", "ALL", "PAYX", "JCI", "CNC", "PSX",
    "HCA", "ROP", "AJG", "PH", "CTAS", "CTSH", "EA", "ROK", "KMB", "A",
    "ED", "IFF", "WELL", "YUM", "HES", "SWK", "TSN", "DTE", "IDXX", "AWK",
    "PPL", "ANET", "WAT", "CARR", "MTD", "FITB", "ZTS", "ETR", "KMI", "VLO",
    "GRMN", "WAB", "XYL", "ATO", "FTV", "EVRG", "FMC", "CMS", "GPC", "HSY",
    "DOV", "NDAQ", "NTRS", "FE", "SIVB", "AMP", "ES", "AEE", "ALB", "REGN",
    "DHI", "STZ", "AVB", "HBAN", "LEN", "MLM", "VFC", "FLT", "LKQ", "ODFL",
    "TSCO", "LUV", "JKHY", "DG", "MAS", "RJF", "CDNS", "ATO", "DRI", "LNC"
]

correct = 0
wrong = 0

for company in company_list:
  Current_Apple_Stocks = yf.Ticker(company)

  # Get 60 data in 1day interval
  data = Current_Apple_Stocks.history(interval = '1d', period = 'max', auto_adjust = True)

  Open = list(data['Open'])[-61:]
  Close = list(data['Close'])[-61:]
  High = list(data['High'])[-61:]
  Low = list(data['Low'])[-61:]

  if len(Open) < 61:
      print(f"{company} Doesn't have enough data!")
      continue

  print(f"Last day : \nAverage :\t{(Open[-2]+Close[-2])/2}\nOpen :\t{Open[-2]}\nClose :\t{Close[-2]}\nHigh :\t{High[-2]}\nLow :\t{Low[-2]}")

  # Make it into a tensor
  datat = []
  for i in range(60):
    datat.append(torch.tensor([100000*Open[i], 100000*High[i], 100000*Low[i], 100000*Close[i]], dtype = torch.float32))
  datat = torch.stack(datat)
  datat = torch.unsqueeze(datat, 0)

  # Predict the next day
  with torch.inference_mode():
    output = model(datat.to(device))

    output = output.tolist()[0]
    for i in range(4):
      output[i] = output[i] / 100000

    average = (output[0]+output[3])/2
    real_average = (Open[-1]+Close[-1])/2
    yesterday_average = (Open[-2]+Close[-2])/2

    if (average > yesterday_average) == (real_average > yesterday_average):
      correct+=1
    else:
      wrong+=1
print(correct, wrong)

Last day : 
Average :	214.39500427246094
Open :	214.69000244140625
Close :	214.10000610351562
High :	215.74000549316406
Low :	212.35000610351562
Last day : 
Average :	452.51499938964844
Open :	452.17999267578125
Close :	452.8500061035156
High :	456.1700134277344
Low :	451.7699890136719
Last day : 
Average :	184.7949981689453
Open :	184.17999267578125
Close :	185.41000366210938
High :	186.0500030517578
Low :	184.02000427246094
Last day : 
Average :	196.43000030517578
Open :	195.00999450683594
Close :	197.85000610351562
High :	199.83999633789062
Low :	194.1999969482422
Last day : 
Average :	196.2949981689453
Open :	195.1699981689453
Close :	197.4199981689453
High :	198.72000122070312
Low :	194.0500030517578
Last day : 
Average :	409.1050109863281
Open :	410.260009765625
Close :	407.95001220703125
High :	410.260009765625
Low :	406.3399963378906
Last day : 
Average :	267.5500030517578
Open :	268.510009765625
Close :	266.5899963378906
High :	270.5899963378906
Low :	266.5
Last day : 
Average

ERROR:yfinance:SIVB: Period 'max' is invalid, must be one of ['1d', '5d']


Last day : 
Average :	38.51999855041504
Open :	38.5099983215332
Close :	38.529998779296875
High :	38.68000030517578
Low :	38.33000183105469
SIVB Doesn't have enough data!
Last day : 
Average :	431.36500549316406
Open :	432.3900146484375
Close :	430.3399963378906
High :	433.04998779296875
Low :	429.6600036621094
Last day : 
Average :	57.04999923706055
Open :	56.90999984741211
Close :	57.189998626708984
High :	57.65999984741211
Low :	56.77000045776367
Last day : 
Average :	70.56000137329102
Open :	70.18000030517578
Close :	70.94000244140625
High :	71.08000183105469
Low :	70.18000030517578
Last day : 
Average :	97.375
Open :	98.4800033569336
Close :	96.2699966430664
High :	99.0999984741211
Low :	94.87999725341797
Last day : 
Average :	1061.0399780273438
Open :	1071.219970703125
Close :	1050.8599853515625
High :	1073.050048828125
Low :	1047.010009765625
Last day : 
Average :	140.80500030517578
Open :	140.41000366210938
Close :	141.1999969482422
High :	141.42999267578125
Low :	138.990005493

ERROR:yfinance:$FLT: possibly delisted; No timezone found


Last day : 
Average :	13.775000095367432
Open :	13.75
Close :	13.800000190734863
High :	13.949999809265137
Low :	13.609999656677246
FLT Doesn't have enough data!
Last day : 
Average :	41.61000061035156
Open :	41.75
Close :	41.470001220703125
High :	41.779998779296875
Low :	40.95000076293945
Last day : 
Average :	175.1449966430664
Open :	175.9199981689453
Close :	174.3699951171875
High :	177.1999969482422
Low :	173.64999389648438
Last day : 
Average :	266.864990234375
Open :	266.17999267578125
Close :	267.54998779296875
High :	268.94000244140625
Low :	265.0
Last day : 
Average :	28.339999198913574
Open :	28.209999084472656
Close :	28.469999313354492
High :	28.520000457763672
Low :	27.979999542236328
Last day : 
Average :	166.9000015258789
Open :	167.0800018310547
Close :	166.72000122070312
High :	168.19000244140625
Low :	166.0399932861328
Last day : 
Average :	129.20999908447266
Open :	128.13999938964844
Close :	130.27999877929688
High :	130.60000610351562
Low :	127.69999694824219
Last 

In [104]:
print(f"Total number of companies : {correct+wrong}")
print(f"Correct count : {correct}")
print(f"Incorrect count : {wrong}")
print(f"Accuracy : {(correct)/(correct+wrong)*100}%")

Total number of companies : 207
Correct count : 136
Incorrect count : 71
Accuracy : 65.70048309178745%
