In [None]:
import torch
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler

import pandas as pd
import numpy as np

# Load the training data
train = pd.read_csv('train_new.csv')

# Debugging: Display the first few rows of the training data
print(train.head())


# Handle the missing values, we will first test 
for i in train.columns:
    if train[i].dtype == 'object':
        train[i].fillna(train[i].mode()[0], inplace=True)
    else:
        train[i].fillna(train[i].mean(), inplace=True)

# Encoding categorical variables
label_encoders = {}
for i in train.select_dtypes(include=['object']).columns:
    # Skip the ID column
    if i != 'ID': 
        le = LabelEncoder()
        train[i] = le.fit_transform(train[i])
        label_encoders[i] = le

# Convert datetime columns to datetime format and extract year, month, day, try to use this simple preprocessing method first
datetime_cols = ['CropTillageDate', 'RcNursEstDate', 'Harv_date', 'Threshing_date']
for col in datetime_cols:
    train[col] = pd.to_datetime(train[col])
    train[col + '_year'] = train[col].dt.year
    train[col + '_month'] = train[col].dt.month
    train[col + '_day'] = train[col].dt.day

# Drop original datetime columns, only use new columns
train.drop(columns=datetime_cols, inplace=True)

# Normalizing numerical features
scaler = MinMaxScaler()
numeric_columns = train.select_dtypes(include=[np.number]).columns.drop('Yield')
train[numeric_columns] = scaler.fit_transform(train[numeric_columns])

# Preparing the data for modeling
# Training Features
X = train.drop(['ID', 'Yield'], axis=1) 
# Target Feature
y = train['Yield']  

# Splitting the dataset into training and validation sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert arrays to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

# Create DataLoader for both training and testing sets
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=64, shuffle=False)

According to h2o AutoML pipeline, the MLP model performs much better than we expected, this notebook will concentrate on deep learning methods

There are several deep learning methods which can be tested besides simple MLP

In [None]:
import torch
import torch.nn as nn

We will start from the simple MLP methods, this works as a baseline and an naive example

In [None]:
class BasicFNN(nn.Module):
    def __init__(self, input_dim):
        super(BasicFNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 1)
    
    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x


In [None]:
class SimpleMLP(nn.module):
    """
    A super simple MLP to test the power of deep learning
    Args:
        input_dim: int, the dimension of the input, aka the number of features
        hidden_dim: int, the dimension of the hidden layer
    Returns:
        output: the output of the MLP, aka the prediction
    """
    def __init__(self, 
                 input_dim:int, 
                 hidden_dim:int,
                 ):
        super(SimpleMLP, self).__init__()
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, 1) 
        self.dropout = nn.Dropout(0.1)
        self.activation = nn.ReLU()  
    
    def forward(self, x):
        pass

In [None]:
from torch.nn import TransformerEncoder, TransformerEncoderLayer

class TabularTransformer(nn.Module):
    def __init__(self, input_dim, d_model=64, nhead=2, num_encoder_layers=2, dim_feedforward=128):
        super(TabularTransformer, self).__init__()
        self.encoder_layer = TransformerEncoderLayer(d_model, nhead, dim_feedforward)
        self.transformer_encoder = TransformerEncoder(self.encoder_layer, num_encoder_layers)
        self.input_fc = nn.Linear(input_dim, d_model)
        self.output_fc = nn.Linear(d_model, 1)
    
    def forward(self, x):
        x = self.input_fc(x)
        x = x.unsqueeze(1)  # Add sequence dimension
        x = self.transformer_encoder(x)
        x = x.squeeze(1)  # Remove sequence dimension
        x = self.output_fc(x)
        return x

In [None]:
class UNetTabular(nn.Module):
    def __init__(self, input_dim):
        super(UNetTabular, self).__init__()
        self.down1 = nn.Sequential(nn.Linear(input_dim, 128), nn.ReLU(inplace=True))
        self.down2 = nn.Sequential(nn.Linear(128, 64), nn.ReLU(inplace=True))
        self.up1 = nn.Sequential(nn.Linear(64, 128), nn.ReLU(inplace=True))
        self.up2 = nn.Linear(128, 1)
    
    def forward(self, x):
        x1 = self.down1(x)
        x2 = self.down2(x1)
        x = self.up1(x2)
        x += x1  # Skip connection
        x = self.up2(x)
        return x

In [None]:
import torch.nn.functional as F

class AttentionBasedFNN(nn.Module):
    def __init__(self, input_dim):
        super(AttentionBasedFNN, self).__init__()
        self.attention_fc = nn.Linear(input_dim, input_dim)
        self.fc1 = nn.Linear(input_dim, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 1)
    
    def forward(self, x):
        attention_weights = F.softmax(self.attention_fc(x), dim=1)
        x = x * attention_weights
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return self.fc3(x)

In [None]:
class GRUForTabular(nn.Module):
    def __init__(self, input_dim, hidden_dim=64, num_layers=2):
        super(GRUForTabular, self).__init__()
        self.gru = nn.GRU(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, 1)
    
    def forward(self, x):
        x, _ = self.gru(x)
        x = x[:, -1, :]
        x = self.fc(x)
        return x

In [None]:
from torch.nn import MSELoss
from torch.optim import Adam

input_dim = X_train.shape[1]
model = TabularTransformer(input_dim=input_dim)
criterion = MSELoss()
optimizer = Adam(model.parameters(), lr=0.001)

In [None]:
num_epochs = 100

for epoch in range(num_epochs):
    model.train()
    for inputs, targets in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
    
    print(f'Epoch {epoch+1}, Loss: {loss.item()}')

In [None]:
model.eval()  
with torch.no_grad():
    correct = 0
    total = 0
    for inputs, targets in test_loader:
        outputs = model(inputs)

In [None]:
torch.save(model.state_dict(), 'tabular_transformer_model.pth')

model = TabularTransformer(input_dim=input_dim)
model.load_state_dict(torch.load('tabular_transformer_model.pth'))
model.eval()