In [1]:
import numpy as np
import pandas as pd
import torch.nn as nn
import torch
import math

In [2]:
# gather data from csv files
position_data = pd.read_csv("Data/Split1/Train/PositionFeatures.csv")
time_data = pd.read_csv("Data/Split1/Train/TimeFeatures.csv")
numerical_data = pd.read_csv("Data/Split1/Train/Features.csv")

# spherical position embedding
def encode_position(latitude, longitude):
    lat_rad = math.radians(latitude)
    lon_rad = math.radians(longitude)
    x = math.cos(lat_rad) * math.cos(lon_rad)
    y = math.cos(lat_rad) * math.sin(lon_rad)
    z = math.sin(lat_rad)
    return x, y, z

# extract features from data
position_features = position_data.apply(lambda row: encode_position(row["latitude"], row["longitude"]), axis=1)
position_features = pd.DataFrame(position_features.tolist(), columns=["x", "y", "z"])

time_features = pd.DataFrame()
time_features["month_sin"] = np.sin(2 * np.pi * time_data["month"] / 12)
time_features["month_cos"] = np.cos(2 * np.pi * time_data["month"] / 12)
time_features["day_sin"] = np.sin(2 * np.pi * time_data["day"] / 31)
time_features["day_cos"] = np.cos(2 * np.pi * time_data["day"] / 31)
time_features["hour_sin"] = np.sin(2 * np.pi * time_data["hour"] / 24)
time_features["hour_cos"] = np.cos(2 * np.pi * time_data["hour"] / 24)
time_features['year'] = time_data['year']
time_features['AM_PM'] = time_data['AM_PM']

numerical_features = numerical_data.drop(columns=['station_id'])


# create embeddings using a linear layer
position_embedding_layer = nn.Linear(position_features.shape[1], 256)
position_embedding = position_embedding_layer(torch.tensor(position_features.values, dtype=torch.float32))

time_embedding_layer = nn.Linear(time_features.shape[1], 256)
time_embedding = time_embedding_layer(torch.tensor(time_features.values, dtype=torch.float32))

numerical_embedding_layer = nn.Linear(numerical_features.shape[1], 256)
numerical_embedding = numerical_embedding_layer(torch.tensor(numerical_features.values, dtype=torch.float32))

# add embeddings together to get input
X_train = position_embedding + time_embedding + numerical_embedding
X_train.shape

torch.Size([118957, 256])

In [3]:
# gather output values
output_values = pd.read_csv("Data/Split1/Train/Values.csv")

# extract output values
output_values = output_values.drop(columns=['station_id'])

# convert to tensor
y_train = torch.tensor(output_values.values, dtype=torch.float32)
y_train.shape

torch.Size([118957, 1])

In [None]:
class SpatiotemporalEnconder(nn.Module):
    def __init__(self, model_dim):
        self.model_dim = model_dim
        self.position_embedding_layer = nn.Linear(3, model_dim)
        self.time_embedding_layer = nn.Linear(8, model_dim)
        self.numerical_embedding_layer = nn.Linear(7, model_dim)

    def encode_coordinates(latitude, longitude):
        lat_rad = math.radians(latitude)
        lon_rad = math.radians(longitude)
        x = math.cos(lat_rad) * math.cos(lon_rad)
        y = math.cos(lat_rad) * math.sin(lon_rad)
        z = math.sin(lat_rad)
        return x, y, z

    def encode_time(month, day, hour, year, AM_PM):
        month_sin = np.sin(2 * np.pi * month / 12)
        month_cos = np.cos(2 * np.pi * month / 12)
        day_sin = np.sin(2 * np.pi * day / 31)
        day_cos = np.cos(2 * np.pi * day / 31)
        hour_sin = np.sin(2 * np.pi * hour / 24)
        hour_cos = np.cos(2 * np.pi * hour / 24)
        return month_sin, month_cos, day_sin, day_cos, hour_sin, hour_cos, year, AM_PM

    def forward(self, X):
        if 'longitude' in X.columns and 'latitude' in X.columns:
            position_embedding = self.position_embedding_layer(encode_coordinates(X[['latitude']], X[['longitude']]))
        else:
            raise ValueError("Input data must contain 'latitude' and 'longitude' columns")
    
        if 'month' in X.columns and 'day' in X.columns and 'hour' in X.columns and 'year' in X.columns and 'AM_PM' in X.columns:
            time_embedding = self.time_embedding_layer(encode_time(X[['month']], X[['day']], X[['hour']], X[['year']], X[['AM_PM']]))
        else:
            raise ValueError("Input data must contain 'month', 'day', 'hour', 'year', and 'AM_PM' columns")
        
        X = X.drop(columns=['latitude', 'longitude', 'month', 'day', 'hour', 'year', 'AM_PM'])
        if X.shape[1] == 7:
            numerical_embedding = self.numerical_embedding_layer(X)
        else:
            raise ValueError("Incorrect Numerical Features")
        
        return position_embedding + time_embedding + numerical_embedding

In [None]:
class MultiHeadAttention(nn.Module):
    def __init__(self, input_dim, model_dim, output_dim, num_heads):
        super().__init__()

        # define model dimensions
        self.num_heads = num_heads
        self.head_dim = model_dim // num_heads

        # define query, key, and value weight matrices which will be trained
        self.W_Q = nn.Linear(input_dim, model_dim)
        self.W_K = nn.Linear(input_dim, model_dim)
        self.W_V = nn.Linear(input_dim, model_dim)

        # define output weight matrix which will be trained
        self.W_O = nn.Linear(model_dim, model_dim)

    def forward(self, X):
        batch_size, input_dim = X.shape

        Q = self.W_Q(X).view(batch_size, self.num_heads, self.head_dim)
        K = self.W_K(X).view(batch_size, self.num_heads, self.head_dim)
        V = self.W_V(X).view(batch_size, self.num_heads, self.head_dim)

        attention_scores = torch.bmm(Q, K.transpose(1, 2)) / math.sqrt(self.head_dim)
        attention_weights = torch.softmax(attention_scores, dim=-1)

        attention_output = torch.bmm(attention_weights, V).view(batch_size, -1)

        output = self.W_O(attention_output)

        return self.final_layer(output)

In [None]:
class LayerNormalization(nn.Module):
    def __init__(self, model_dim, eps=1e-5):
        super().__init__()
        self.model_dim=model_dim
        self.eps=eps
        self.gamma = nn.Parameter(torch.ones(model_dim))
        self.beta =  nn.Parameter(torch.zeros(model_dim))

    def forward(self, X):
        dims = [-(i + 1) for i in range(len(self.model_dim))]
        mean = X.mean(dim=dims, keepdim=True)
        var = ((X - mean) ** 2).mean(dim=dims, keepdim=True)
        std = (var + self.eps).sqrt()
        y = (X - mean) / std
        out = self.gamma * y + self.beta
        return out

In [None]:
class PositionwiseFeedForward(nn.Module):
    def __init__(self, model_dim, hidden, drop_prob):
        super(PositionwiseFeedForward, self).__init__()
        self.linear1 = nn.Linear(model_dim, hidden)
        self.linear2 = nn.Linear(hidden, model_dim)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=drop_prob)

    def forward(self, x):
        x = self.linear1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.linear2(x)
        return x


In [None]:
class EncoderLayer(nn.Module):
    def __init__(self, model_dim, ffn_hidden, num_heads, drop_prob):
        super(EncoderLayer, self).__init__()
        self.attention = MultiHeadAttention(model_dim=model_dim, num_heads=num_heads)
        self.norm1 = LayerNormalization(model_dim=model_dim)
        self.dropout1 = nn.Dropout(p=drop_prob)
        self.ffn = PositionwiseFeedForward(model_dim=model_dim, hidden=ffn_hidden, drop_prob=drop_prob)
        self.norm2 = LayerNormalization(model_dim=model_dim)
        self.dropout2 = nn.Dropout(p=drop_prob)

    def forward(self, x, self_attention_mask):
        residual_x = x.clone()
        x = self.attention(x, mask=self_attention_mask)
        x = self.dropout1(x)
        x = self.norm1(x + residual_x)
        residual_x = x.clone()
        x = self.ffn(x)
        x = self.dropout2(x)
        x = self.norm2(x + residual_x)
        return x

In [None]:
class Transformer(nn.Module):
    def __init__(self, model_dim, num_heads, num_layers, feedforward_dim):
        super().__init__()

        self.encoder = nn.Module(SpatiotemporalEnconder(model_dim))

        self.layers = nn.ModuleList([MultiHeadAttention(model_dim, model_dim, model_dim, num_heads) for _ in range(num_layers)])
        self.feedforward = nn.Sequential(
            nn.Linear(model_dim, feedforward_dim),
            nn.ReLU(),
            nn.Linear(feedforward_dim, model_dim)
        )

    def forward(self, X):
        X = self.encoder(X)

        for layer in self.layers:
            X = layer(X)

        X = self.feedforward(X)

        return X

In [1]:
# Ensure X_train and output_values are PyTorch tensors with requires_grad=False
X_train = X_train.clone().detach().requires_grad_(False)
y_train = y_train.clone().detach().requires_grad_(False)

# Handle NaN values in X_train and output_values
X_train[torch.isnan(X_train)] = 0 # 0 nans
y_train[torch.isnan(y_train)] = 0 # 1395 nans

# Define model, loss function, and optimizer
batch_size = 32
num_heads = 8
input_dim = X_train.shape[1]
model_dim = 512
output_dim = 1

multi_head_attention = MultiHeadAttention(input_dim, model_dim, output_dim, num_heads)

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(multi_head_attention.parameters(), lr=0.001)

# Train model
num_epochs = 100
for epoch in range(num_epochs):
    for i in range(0, X_train.shape[0], batch_size):
        X_batch = X_train[i:i+batch_size]
        y_batch = y_train[i:i+batch_size]

        optimizer.zero_grad()
        y_pred = multi_head_attention(X_batch)

        loss = criterion(y_pred, y_batch)
        loss.backward()

        optimizer.step()

        if i % 5000 == 0:
            print(f"Epoch {epoch}, Loss: {loss.item()}")

NameError: name 'X_train' is not defined