In [26]:
import torch
import torch.nn as nn # defining our neural network
import torch.optim as optim # training our neural network
from torch.utils.data import DataLoader, Dataset # loading data in batches
import pandas as pd

In [20]:
# our neural network class is a subclass of nn.Module
# this handles a lot of the boilerplate code for us
# including parameter initialization, etc.
class FFN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        '''
        Args:
            input_size: size of the input
            hidden_size: size of the hidden layer
            output_size: size of the output layer
        '''
        super().__init__() # call the parent class's constructor
        # define layers
        # nn.Linear takes in the size of the input and output
        self.fc1 = nn.Linear(input_size, hidden_size) # first fully connected layer
        self.relu = nn.ReLU() # activation function
        self.fc2 = nn.Linear(hidden_size, output_size) # second fully connected layer
    def forward(self, x):
        # define the forward pass
        out = self.fc1(x) # pass through first layer
        out = self.relu(out) # apply activation function
        out = self.fc2(out) # pass through second layer
        return out

In [21]:
model = FFN(input_size=10, hidden_size=5, output_size=1)
random_input = torch.randn(1, 10) # a tensor of size (1, 10) (1 row and 10 columns) where every element is drawn from a normal distribution
output = model(random_input)
print(output)

tensor([[0.1931]], grad_fn=<AddmmBackward0>)


In [22]:
class SP500Dataset(Dataset):
    def __init__(self, data, target_column):
        '''
        Args:
            data (pd.DataFrame): The dataframe with features and target
            target_column (str): The string name of the target column
        '''
        self.target_name = target_column
        
        # Pre-separate features (X) and target (y)
        # .values converts from pandas to a numpy array
        self.X = data.drop(columns=[target_column]).values
        self.y = data[target_column].values

    def __len__(self):
        # Return the number of rows
        return len(self.y)

    def __getitem__(self, idx):
        # Return a tuple of (features, target) for the given index
        return torch.tensor(self.X[idx], dtype=torch.float32), \
               torch.tensor(self.y[idx], dtype=torch.float32)

In [23]:
import pandas as pd

# Load the training data
train_df = pd.read_csv("/kaggle/input/hull-tactical-market-prediction/train.csv")

# --- Define Features and Target ---
# Your FFN model is defined with input_size=10
# We must select 10 features. For this example, we'll pick V1-V9 and D1.
feature_cols = ['V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'D1']

# The actual target column in the CSV is 'forward_returns'
target_col_original = 'forward_returns'

# Your code expects a column named 'target', so we'll rename it
target_col_new = 'target'

# Select only the columns we need
all_cols_to_keep = feature_cols + [target_col_original]
df = train_df[all_cols_to_keep].copy()

# Rename the target column
df = df.rename(columns={target_col_original: target_col_new})

# Handle missing data: For this simple model, we'll drop rows with any NaNs
df = df.dropna().reset_index(drop=True)

print(f"Data loaded. Using {len(feature_cols)} features.")
print(f"Original rows: {len(train_df)}, Usable rows (after dropna): {len(df)}")

Data loaded. Using 10 features.
Original rows: 8990, Usable rows (after dropna): 4451


In [None]:
# Load the training data
train_df = pd.read_csv("/kaggle/input/hull-tactical-market-prediction/train.csv")

# --- Define Features and Target ---
# Your FFN model is defined with input_size=10
# We must select 10 features. For this example, we'll pick V1-V9 and D1.
feature_cols = ['V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'D1']

# The actual target column in the CSV is 'forward_returns'
target_col_original = 'forward_returns'

# Your code expects a column named 'target', so we'll rename it
target_col_new = 'target'

# Select only the columns we need
all_cols_to_keep = feature_cols + [target_col_original]
df = train_df[all_cols_to_keep].copy()

# Rename the target column
df = df.rename(columns={target_col_original: target_col_new})

# Handle missing data: For this simple model, we'll drop rows with any NaNs
df = df.dropna().reset_index(drop=True)

print(f"Data loaded. Using {len(feature_cols)} features.")
print(f"Original rows: {len(train_df)}, Usable rows (after dropna): {len(df)}")

In [27]:
# We input our pandas dataframe that we processed before
dataset = SP500Dataset(data=df, target_column='target') # <-- Pass the STRING 'target'

# This is an iterable that will yield batches of data
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

# Our model, optimizer, and loss function
# Make sure input_size matches the number of features you chose in the new cell
model = FFN(input_size=len(feature_cols), hidden_size=5, output_size=1)

# Variant of Stochastic Gradient Descent
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Mean Squared Error Loss for regression tasks
criterion = nn.MSELoss()

for epoch in range(10): # loop over the dataset multiple times
    for inputs, targets in dataloader:
        optimizer.zero_grad() # zero the parameter gradients

        outputs = model(inputs) # forward pass
        loss = criterion(outputs.squeeze(), targets) # compute loss
        loss.backward() # backward pass (compute gradients)
        optimizer.step() # update parameters

    print(f'Epoch {epoch+1}, Loss: {loss.item()}')

TypeError: SP500Dataset.__init__() got an unexpected keyword argument 'target_column'