We start with one input:
Date - specifies trading date

We predict five outputs:
Open - opening price
High - maximum price during the day
Low - minimum price during the day
Close - close price adjusted for splits
Adj Close - adjusted close price adjusted for both dividends and splits.

https://www.kaggle.com/datasets/jacksoncrow/stock-market-dataset?resource=download-directory&select=stocks 

In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Dataset
import matplotlib.pyplot as plt

from pathlib import Path
import pandas
import nasdaqdatalink as datalink
NASDAQ_DATA_LINK_API_KEY = "XBH69AuacmmTQ5-HbcSi"

In [101]:
# load dataset
#transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
# Training dataset

# Custom Dataset class for our custom data
class StockDataset(Dataset):
    # Calculates difference in days between current date and start date
    def _CalculateDayNum(self, startYear, startMonth, startDay, year, month, day):
        # How many days are in each month
        daysByMonth = {
            1: 31,
            2: 28,
            3: 31,
            4: 30,
            5: 31,
            6: 30,
            7: 31,
            8: 31,
            9: 30,
            10: 31,
            11: 30,
            12: 31
        }
        # There is a leap year every four years
        yearDiff = year - startYear
        leapDays = yearDiff // 4
        # If the years are both leap years, we may have one more or one less leap day than expected
        if startYear % 4 == 0 and year % 4 == 0:
            # We start by subtracting a leap day
            leapDays -= 1

        # If startYear is a leap year and the start date is before the leap day, we increment the number of leap days
        if startYear % 4 == 0 and startMonth <= 2 and startDay < 29:
            leapDays += 1

        testYear = startYear + 1
        # After dividing yearDiff by four, we take the years in between of the remainder and determine if there is an extra leap year
        for i in range(yearDiff % 4 - 1):
            if testYear % 4 == 0:
                leapDays += 1
                break
            testYear += 1

        # If year is a leap year and the end date is after the leap day, we increment the number of leap days
        if year % 4 == 0 and month > 2:
            leapDays += 1

        if leapDays < 0:
            leapDays = 0

        startDaysOutOfYear = 0
        daysOutOfYear = 0
        # Adds the days for each month except the last month
        for m in range(1, startMonth):
            startDaysOutOfYear += daysByMonth[m]
        # Adds the days for the last month
        startDaysOutOfYear += startDay
        # Same thing
        for m in range(1, month):
            daysOutOfYear += daysByMonth[m]
        daysOutOfYear += day

        # Maybe more days passed in the start year than the current year in which case the second term will be negative
        return yearDiff * 365 + (daysOutOfYear - startDaysOutOfYear) + leapDays
    
    # Calculates date based on day number and start date
    def _CalculateDate(self, dayNum):
        # How many days are in each month
        daysByMonth = {
            1: 31,
            2: 28,
            3: 31,
            4: 30,
            5: 31,
            6: 30,
            7: 31,
            8: 31,
            9: 30,
            10: 31,
            11: 30,
            12: 31
        }
        year = self._startYear
        month = self._startMonth
        day = self._startDay
        leapYear = False
        if year % 4 == 0:
            leapYear = True
        if dayNum > 0:
            # Take remaining days out of first month and subtract from total days, if possible
            if leapYear and month == 2:
                daysInMonth = 29
            else:
                daysInMonth = daysByMonth[month]
            if dayNum > daysInMonth - day:
                dayNum -= daysInMonth - day
                day = 0
                month += 1
                if month > 12:
                    month = 1
                    year += 1
            # If not possible we add dayNum to day which may roll over to next month
            else:
                day += dayNum
                if day > daysInMonth:
                    day %= daysInMonth
                    month += 1
                    if month > 12:
                        month = 1
                        year += 1
                dayNum = 0
                
            # Now keep substracting the days in each month from dayNum until we are on the last month
            if year % 4 == 0:
                leapYear = True
            else:
                leapYear = False
            if leapYear and month == 2:
                daysInMonth = 29
            else:
                daysInMonth = daysByMonth[month]
            while dayNum > daysInMonth:
                dayNum -= daysInMonth
                month += 1
                if month > 12:
                    month = 1
                    year += 1
                if year % 4 == 0:
                    leapYear = True
                else:
                    leapYear = False
                if leapYear and month == 2:
                    daysInMonth = 29
                else:
                    daysInMonth = daysByMonth[month]

            # For last month we add remaining days
            day += dayNum
        # Negative case
        elif dayNum < 0:
            # We add days remaining in first month if possible
            if abs(dayNum) >= day:
                dayNum += day
                month -= 1
                if month < 1:
                    month = 12
                    year -= 1
                # We need to change month and year before we check them
                if year % 4 == 0:
                    leapYear = True
                else:
                    leapYear = False
                if leapYear and month == 2:
                    daysInMonth = 29
                else:
                    daysInMonth = daysByMonth[month]
                day = daysInMonth
            # Otherwise we subtract absolute value of dayNum from days
            else:
                day -= abs(dayNum)
                dayNum = 0
                daysInMonth = 30
            
            # We decrease months one at a time until we reach the last month
            while abs(dayNum) >= daysInMonth:
                dayNum += daysInMonth
                month -= 1
                if month < 1:
                    month = 12
                    year -= 1
                if year % 4 == 0:
                    leapYear = True
                else:
                    leapYear = False
                if leapYear and month == 2:
                    daysInMonth = 29
                else:
                    daysInMonth = daysByMonth[month]
                day = daysInMonth

            # We subtract remaining days from end of last month
            day -= abs(dayNum)

        # Convert to date format
        s = str(year) + "-"
        if month < 10:
            s += "0" + str(month) + "-"
        else:
            s += str(month) + "-"
        if day < 10:
            s += "0" + str(day)
        else:
            s += str(day)
        return s
            
    
    def GetStartDate(self):
        return self._startYear, self._startMonth, self._startDay
    
    # Convert dates back to string format in testing datasets
    def _ResetDates(self):
        if self._startYear != -1 and self._startMonth != -1 and self._startDay != -1:
            # numpy arrays of certain type have an extra field
            self._npArray = self._npArray.astype(object)
            for i in range(len(self._npArray) - 1):
                self._npArray[i][0] = self._CalculateDate(self._npArray[i][0])
            self._startYear = -1
            self._startMonth = -1
            self._startDay = -1
            
    
    # For converting dates to whole numbers in testing datasets
    def SetStartDate(self, startYear, startMonth, startDay):
        # Convert to dates if not already converted
        self._ResetDates()
        # Set start date
        self._startYear = startYear
        self._startMonth = startMonth
        self._startDay = startDay

        for i in range(len(self._npArray)):
            # The first entry in the ith row is the current date in string format
            date = self._npArray[i][0]
            # Four digits for year, two for month, two for day, all seperated by hypens
            year = int(date[:4])
            month = int(date[5:7])
            day = int(date[8:])
            # If current date is less, we give it to _CalculateDayNum() first because it expects the first date to be less than or equal to the second
            # Then we negate it because we are expecting a negative value here
            if year < startYear:
                self._npArray[i][0] = -self._CalculateDayNum(year, month, day, startYear, startMonth, startDay)
            elif year == startYear:
                if month < startMonth:
                    self._npArray[i][0] = -self._CalculateDayNum(year, month, day, startYear, startMonth, startDay)
                elif month == startMonth:
                    if day < startDay:
                        self._npArray[i][0] = -self._CalculateDayNum(year, month, day, startYear, startMonth, startDay)
            else:
                self._npArray[i][0] = self._CalculateDayNum(startYear, startMonth, startDay, year, month, day)
        
        self._npArray = self._npArray.astype('double') 
        self._tensor = torch.from_numpy(self._npArray).float()

    def __init__(self, csvPath, train):
        npArray = pandas.read_csv(csvPath).to_numpy()
        #self._path = csvPath

        if train:
            # We need to find the minimum date since our _CalculateDayNum() function assumes start date <= current date
            minIndex = 0
            for i in range(1, len(npArray)):
                # The first entry in the ith row is the current date in string format
                date = npArray[i][0]
                # Four digits for year, two for month, two for day, all seperated by hypens
                year = int(date[:4])
                month = int(date[5:7])
                day = int(date[8:])
                # For comparison
                minDate = npArray[minIndex][0]
                minYear = int(minDate[:4])
                minMonth = int(minDate[5:7])
                minDay = int(date[8:])
                # If current date is less, we make it the minimum date
                if year < minYear:
                    minIndex = i
                elif year == minYear:
                    if month < minMonth:
                        minIndex = i
                    elif month == minMonth:
                        if day < minDay:
                            minIndex = i

            self._startYear = int(npArray[minIndex][0][:4])
            self._startMonth = int(npArray[minIndex][0][5:7])
            self._startDay = int(npArray[minIndex][0][8:])
            # The first day
            npArray[minIndex][0] = 0

            for i in range(len(npArray)):
                # Convert all dates into whole numbers relative to start date
                if i != minIndex:
                    row = npArray[i]
                    date = row[0]
                    year = int(date[:4])
                    month = int(date[5:7])
                    day = int(date[8:])
                    row[0] = self._CalculateDayNum(self._startYear, self._startMonth, self._startDay, year, month, day)

            # We can now make a tensor object
            self._npArray = npArray.astype('double')
            self._tensor = torch.from_numpy(self._npArray).float()
        # We do not know what the start date for testing datasets is at initialization time
        else:
            self._npArray = npArray
            self._startYear = -1
            self._startMonth = -1
            self._startDay = -1
        #print(self._npArray[len(self._npArray) - 1][0])

    def __len__(self):
        return len(self._tensor)

    def __getitem__(self, index):
        # Use index to read corresponding file
        #csvPath = Path(self._folder) / self._stockNames[index]
        # Converts .csv data to tensor
        #torch.tensor(pandas.read_csv(csvPath))
        
        row = self._tensor[index]
        #print(row)
        day = row[0]
        open, high, low, close, adjClsoe = row[1:6]
        
        #print(startDate)
        #print(startYear, startMonth, startDay, sep = '-')
        #npArray = npArray.astype('f')
        #print(npArray)

        #stockData = torch.tensor(npArray)
        #stockData = torch.tensor((pandas.read_csv(csvPath).to_numpy()).tolist())
        return day, open, high, low, close, adjClsoe

# Training datasets
train_dataset_path1 = "C:/Users/simsd/projects/StockPrices/Training_Datasets/DIS.csv" # Put training path here
train_dataset_path2 = "C:/Users/simsd/projects/StockPrices/Training_Datasets/GE.csv" # Put training path here
train_dataset_path3 = "C:/Users/simsd/projects/StockPrices/Training_Datasets/HPQ.csv" # Put training path here
train_dataset1 = StockDataset(train_dataset_path1, train = True)
train_dataset2 = StockDataset(train_dataset_path2, train = True)
train_dataset3 = StockDataset(train_dataset_path3, train = True)
#print(train_dataset1._tensor) # Accessing "private" class member

# Test
#train_dataset1.__getitem__(0)

# Testing datasets
test_dataset_path1 = "C:/Users/simsd/projects/StockPrices/Testing_Datasets/AA.csv" # Put testing path here
test_dataset_path2 = "C:/Users/simsd/projects/StockPrices/Testing_Datasets/ARNC.csv" # Put testing path here
test_dataset_path3 = "C:/Users/simsd/projects/StockPrices/Testing_Datasets/IP.csv" # Put testing path here
test_dataset1 = StockDataset(test_dataset_path1, train = False)
test_dataset2 = StockDataset(test_dataset_path2, train = False)
test_dataset3 = StockDataset(test_dataset_path3, train = False)
#print(test_dataset1._npArray) # Accessing "private" class member

# Data loaders
train_loader = DataLoader(train_dataset1, batch_size = 64, shuffle = True, drop_last = True)

test_loader = DataLoader(test_dataset1, batch_size = 1000, shuffle = False)

In [None]:
# Define the neural network class
batchSize = 64
class NeuralNet(nn.Module):
    def __init__(self):
        super(NeuralNet, self).__init__()
        #1st  layer: map (1 * batch size) input day to (128) hidden neurons
        self.fc1 = nn.Linear(1 * batchSize, 128)
        #2nd layer: map (128) hidden neurons to (256) hidden neurons
        self.fc2 = nn.Linear(128, 256)
        #3rd layer: map (256) hidden neurons to (5 * batch size) outputs representing stock prices for input day relative to some start date
        self.fc3 = nn.Linear(256, 5 * batchSize)

# Use “forward” to define the computing flow. ReLu activation is applied. 
    def forward(self, x):
        #x = x.view(-1, 28 * 28)  
        # First layer computation
        #print(x)
        x = F.relu(self.fc1(x))
        # Second layer computation
        x = F.relu(self.fc2(x))
        # Third layer computation
        # All output values should be positive
        x = torch.abs(self.fc3(x))
        return x

In [112]:
# Initialize model, loss function, and optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#Generate a neural network model
model = NeuralNet().to(device)
# Define the cross-entropy loss
criterion = nn.CrossEntropyLoss()
# Define the Adam Optimizer, choosing the learning rate as 0.001.
optimizer = optim.Adam(model.parameters(), lr = 0.001)

Demonstrates the least loss with batch size = 1 but the epochs take longer

In [115]:
# Training 
# Define number of epochs. An epoch is one complete pass through the entire training dataset
epochs=10
batchSize = 64
# Set training mode
model.train()
for epoch in range(epochs):
    # Initialize training loss
    total_loss = 0
    for day, open, high, low, close, adjClose in train_loader: # Get input and output in an iteration 
        #print("Input:", day)
        #print("open:", open)
        # Send input and output to device (GPU or CPU)
        outputPrices = {
            0: open,
            1: high,
            2: low,
            3: close,
            4: adjClose
        }
        day = day.to(device)
        for i in range(len(outputPrices)):
            outputPrices[i] = outputPrices[i].to(device)
        #Clean gradient storage
        optimizer.zero_grad()
        #Forward pass to get output
        output = model(day)
        #print("Output:", output)
        
        # Combine prices into a single tensor matching format of output tensor
        prices = []
        for i in range(len(output)):
            prices.append(outputPrices[i % 5][i // 5])
        #print(prices)

        #Calculate loss
        #print(len(prices) == len(output))
        loss = criterion(output, torch.tensor(prices))

        #Backward loss to get gradients
        loss.backward()
        #Perform one gradient descent update
        optimizer.step()
        #Accumulate loss
        total_loss += loss.item()
    #Print average loss for each epoch.
    print(f"Epoch {epoch + 1}, Loss: {total_loss / len(train_loader):.4f}")

Epoch 1, Loss: 41652.0459
Epoch 2, Loss: 41597.8513
Epoch 3, Loss: 41596.6364
Epoch 4, Loss: 41607.3515
Epoch 5, Loss: 41843.9425
Epoch 6, Loss: 41936.7692
Epoch 7, Loss: 41659.0249
Epoch 8, Loss: 41577.0756
Epoch 9, Loss: 41576.8615
Epoch 10, Loss: 41606.4973
