<a href="https://colab.research.google.com/github/aidanbolinger/MachineLearning/blob/main/FinalProject1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install yfinance



In [None]:
!pip install torch-geometric

Collecting torch-geometric
  Downloading torch_geometric-2.6.1-py3-none-any.whl.metadata (63 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/63.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━[0m [32m61.4/63.1 kB[0m [31m16.1 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.1/63.1 kB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
Downloading torch_geometric-2.6.1-py3-none-any.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m13.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torch-geometric
Successfully installed torch-geometric-2.6.1


In [None]:
import yfinance as yf

#Download historical data for Apple
data = yf.download("AAPL", start="2022-01-01", end="2023-01-01")

#Print first few rows
print(data.head())

[*********************100%***********************]  1 of 1 completed

Price            Close        High         Low        Open     Volume
Ticker            AAPL        AAPL        AAPL        AAPL       AAPL
Date                                                                 
2022-01-03  178.879929  179.734977  174.653889  174.771820  104487900
2022-01-04  176.609634  179.793920  176.039607  179.489254   99310400
2022-01-05  171.911865  177.071580  171.636682  176.521213   94537600
2022-01-06  169.042053  172.285305  168.688244  169.730012   96904000
2022-01-07  169.209122  171.145244  168.088727  169.916741   86709100





In [None]:
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

#Download AAPL close prices
data_yf = yf.download("AAPL", start="2023-01-01", end="2023-12-31")['Close']
data_yf = data_yf.ffill()

#Generate features
close_prices = data_yf.values
features = []
targets = []

for i in range(10, len(close_prices)-1):
  prev_close = close_prices[i].item()
  ma5 = float(np.mean([close_prices[i-4:i+1]]))
  ma10 = float(np.mean(close_prices[i-9:i+1]))
  features.append([prev_close, ma5, ma10])
  targets.append(close_prices[i+1].item())

#Convert to Numpy arrays
X  = np.array(features)
y = np.array(targets)

#Train/test split
split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

#Train linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

#Make predictions
y_pred = model.predict(X_test)

#Evaluate model
mse = mean_squared_error(y_test, y_pred)
print(f"Baseline Linear Regression MSE: {mse}")

[*********************100%***********************]  1 of 1 completed

Baseline Linear Regression MSE: 3.6450027078363085





In [None]:
import torch
import torch.nn as nn
import torch_geometric
from torch_geometric.nn import GCNConv
from torch_geometric.data import Data
import numpy as np

#Download AAPL data
data_yf = yf.download("AAPL", start="2023-01-01", end="2023-12-31")['Close']

#Fill missing data
data_yf = data_yf.ffill()

#Create features
close_prices = data_yf.values
num_days = len(close_prices)

features = []
for i in range(num_days):
  prev_close = close_prices[i]
  ma5 = np.mean(close_prices[max(0, i-4):i+1])
  ma10 = np.mean(close_prices[max(0, i-9):i+1])
  features.append([prev_close.item(), ma5.item(), ma10.item()])

#Convert to PyTorch tensor
features = np.array(features)
X = torch.tensor(features, dtype=torch.float) #[num_days, num_features]

#Create edges
#Connect days to the next day
edge_index = []
for i in range(num_days-1):
  edge_index.append([i, i+1])
edge_index = torch.tensor(edge_index, dtype=torch.long).T

#Predict next day's price
y = torch.tensor(close_prices[1:], dtype=torch.float).unsqueeze(1).squeeze(-1)
X=X[:-1] #Drop last feature row

#Filter out invalid edges
mask = (edge_index[0] < X.shape[0]) & (edge_index[1] < X.shape[0])
edge_index = edge_index[:, mask] #Only keep valid edges

#Create graph data object
data = Data(x=X, edge_index=edge_index, y=y)

# Define the GNN model using GCNConv layers
class StockGNN(nn.Module):
    def __init__(self):
        super(StockGNN, self).__init__()
        self.conv1 = GCNConv(3, 64)
        self.conv2 = GCNConv(64, 32)
        self.fc = nn.Linear(32, 1)  # Output is a single stock price prediction

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = torch.relu(x)
        x = self.conv2(x, edge_index)
        x = torch.relu(x)
        x = self.fc(x)
        return x

# Initialize model and optimizer
model = StockGNN()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
loss_fn = nn.MSELoss() #Mean Squared Error loss

# Training loop
for epoch in range(100):
    model.train()
    optimizer.zero_grad()
    output = model(data)  # Forward pass
    loss = loss_fn(output, data.y)
    loss.backward()
    optimizer.step()

    if epoch % 10 == 0:
        print(f"Epoch {epoch}, Loss: {loss.item()}")

#Test the model and calculate MSE
model.eval()
with torch.no_grad():
  output = model(data)
  test_loss = mean_squared_error(data.y.numpy(), output.numpy())
  print(f"MSE: {test_loss}")


[*********************100%***********************]  1 of 1 completed


Epoch 0, Loss: 27500.40625
Epoch 10, Loss: 1945.3922119140625
Epoch 20, Loss: 489.9573059082031
Epoch 30, Loss: 18.554386138916016
Epoch 40, Loss: 45.010807037353516
Epoch 50, Loss: 65.03643798828125
Epoch 60, Loss: 33.467220306396484
Epoch 70, Loss: 18.138145446777344
Epoch 80, Loss: 20.181913375854492
Epoch 90, Loss: 18.457653045654297
MSE: 18.22766876220703


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

#Use only the 'Close' column
close_prices = data['Close'].values

#Normalize prices between -1 and 1
min_price = close_prices.min()
max_price = close_prices.max()
normalized_prices = 2 * (close_prices - min_price) / (max_price - min_price) - 1

#Convert to PyTorch tensor
real_data = torch.tensor(normalized_prices, dtype=torch.float32).unsqueeze(1)
print(real_data[:5])

# Define the Generator Network
class Generator(nn.Module):
    def __init__(self, z_dim, output_dim):
        super(Generator, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(z_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, output_dim),  # Output dimension is stock price prediction
            nn.Tanh()  # To normalize output between -1 and 1
        )

    def forward(self, z):
        return self.fc(z)

# Define the Discriminator Network
class Discriminator(nn.Module):
    def __init__(self, input_dim):
        super(Discriminator, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.LeakyReLU(0.2),
            nn.Linear(64, 32),
            nn.LeakyReLU(0.2),
            nn.Linear(32, 1),
            nn.Sigmoid()  # Output a probability (real or fake)
        )

    def forward(self, x):
        return self.fc(x)

# Hyperparameters
z_dim = 100  # Random noise input for the generator
stock_price_dim = 1  # We predict one stock price at a time

# Initialize networks
generator = Generator(z_dim, stock_price_dim)
discriminator = Discriminator(stock_price_dim)

# Optimizers
lr = 0.0002
optimizer_g = optim.Adam(generator.parameters(), lr=lr, betas=(0.5, 0.999))
optimizer_d = optim.Adam(discriminator.parameters(), lr=lr, betas=(0.5, 0.999))

# Loss function
criterion = nn.BCELoss()

# Training loop
for epoch in range(10000):
    # Real stock price data (for illustration, using random data)
    indices = torch.randint(0, real_data.size(0), (32,))
    batch_real_data = real_data[indices]

    # Train Discriminator
    optimizer_d.zero_grad()

    # Real data
    real_label = torch.ones(32)
    output_real = discriminator(batch_real_data).squeeze()
    loss_real = criterion(output_real, real_label)

    # Fake data (generated by the generator)
    noise = torch.randn(32, z_dim)
    fake_data = generator(noise)
    fake_label = torch.zeros(32)
    output_fake = discriminator(fake_data).squeeze()
    loss_fake = criterion(output_fake, fake_label)

    # Total loss for discriminator
    loss_d = loss_real + loss_fake
    loss_d.backward()
    optimizer_d.step()

    # Train Generator
    optimizer_g.zero_grad()

    # Try to fool the discriminator (minimize loss on fake data)
    noise = torch.randn(32, z_dim)
    fake_data = generator(noise)  # Labels for fake data should be 1 (real)
    output_fake = discriminator(fake_data)
    fake_label = torch.ones(32, 1)
    loss_g = criterion(output_fake, fake_label)

    loss_g.backward()
    optimizer_g.step()

    # Print progress
    if epoch % 1000 == 0:
        print(f"Epoch [{epoch}/10000], D Loss: {loss_d.item()}, G Loss: {loss_g.item()}")


tensor([[[1.0000]],

        [[0.9164]],

        [[0.7433]],

        [[0.6376]],

        [[0.6437]]])
Epoch [0/10000], D Loss: 1.3979310989379883, G Loss: 0.6897317171096802
Epoch [1000/10000], D Loss: 1.392025351524353, G Loss: 0.6841793656349182
Epoch [2000/10000], D Loss: 1.3347010612487793, G Loss: 0.7792994976043701
Epoch [3000/10000], D Loss: 1.314685344696045, G Loss: 0.6944489479064941
Epoch [4000/10000], D Loss: 1.388582706451416, G Loss: 0.7003824710845947
Epoch [5000/10000], D Loss: 1.3884835243225098, G Loss: 0.6905910968780518
Epoch [6000/10000], D Loss: 1.3933680057525635, G Loss: 0.697860598564148
Epoch [7000/10000], D Loss: 1.3819947242736816, G Loss: 0.702691912651062
Epoch [8000/10000], D Loss: 1.3860965967178345, G Loss: 0.6958096623420715
Epoch [9000/10000], D Loss: 1.3859188556671143, G Loss: 0.6935076713562012
