In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.model_selection import train_test_split
from torch.utils.data import TensorDataset, DataLoader
import optuna

device = torch.device('mps' if torch.backends.mps.is_available() else 'cpu')

# Load Data
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')

train_df.head()
train_df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15000 entries, 0 to 14999
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   id                     15000 non-null  int64  
 1   Hardness               15000 non-null  float64
 2   allelectrons_Total     15000 non-null  float64
 3   density_Total          15000 non-null  float64
 4   allelectrons_Average   15000 non-null  float64
 5   val_e_Average          15000 non-null  float64
 6   atomicweight_Average   15000 non-null  float64
 7   ionenergy_Average      15000 non-null  float64
 8   el_neg_chi_Average     15000 non-null  float64
 9   R_vdw_element_Average  15000 non-null  float64
 10  R_cov_element_Average  15000 non-null  float64
 11  zaratio_Average        15000 non-null  float64
 12  density_Average        15000 non-null  float64
dtypes: float64(12), int64(1)
memory usage: 1.5 MB
/kaggle/input/round-2-nexus-recruitment/sample_submission.cs

In [2]:
# Feature Engineering
def create_features(df):
    df['density_ratio'] = df['density_Total'] / df['density_Average']
    df['electron_density_ratio'] = df['allelectrons_Total'] / df['density_Total']
    df['weight_density_ratio'] = df['atomicweight_Average'] / df['density_Average']
    df['ionenergy_density'] = df['ionenergy_Average'] / df['density_Average']
    df['log_density'] = np.log1p(df['density_Total'])
    df['log_weight'] = np.log1p(df['atomicweight_Average'])
    df['energy_density_product'] = df['ionenergy_Average'] * df['density_Total']
    df['electron_weight_ratio'] = df['allelectrons_Total'] / df['atomicweight_Average']
    df['total_energy'] = df['allelectrons_Total'] * df['ionenergy_Average']
    return df

train_df = create_features(train_df)
test_df = create_features(test_df)

# Handle infinite values
train_df.replace([np.inf, -np.inf], np.nan, inplace=True)
test_df.replace([np.inf, -np.inf], np.nan, inplace=True)

# Fill missing values
train_df.fillna(train_df.mean(), inplace=True)
test_df.fillna(test_df.mean(), inplace=True)

# Prepare Data
X = train_df.drop(['Hardness', 'id'], axis=1)
y = train_df['Hardness']
X_test = test_df.drop(['id'], axis=1)

In [3]:
# Polynomial Features
poly = PolynomialFeatures(degree=2, interaction_only=False, include_bias=False)
X_poly = poly.fit_transform(X)
X_test_poly = poly.transform(X_test)

# Scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_poly)
X_test_scaled = scaler.transform(X_test_poly)


In [4]:
# Split Data
X_train, X_val, y_train, y_val = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Convert to Tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1).to(device)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32).to(device)
y_val_tensor = torch.tensor(y_val.values, dtype=torch.float32).view(-1, 1).to(device)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32).to(device)

In [5]:
# Objective Function for Optuna
def objective(trial):
    # Hyperparameters
    hidden_size = trial.suggest_int('hidden_size', 128, 1024)
    num_layers = trial.suggest_int('num_layers', 1, 5)
    dropout_rate = trial.suggest_float('dropout_rate', 0.0, 0.5)
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-2)
    weight_decay = trial.suggest_loguniform('weight_decay', 1e-6, 1e-2)
    batch_size = trial.suggest_categorical('batch_size', [32, 64, 128])
    activation_name = trial.suggest_categorical('activation', ['ReLU', 'LeakyReLU', 'ELU', 'GELU'])
    
    # Activation Function
    activation = getattr(nn, activation_name)()
    
    # Model Definition
    class NeuralNet(nn.Module):
        def __init__(self, input_dim):
            super(NeuralNet, self).__init__()
            layers = []
            in_features = input_dim
            for _ in range(num_layers):
                layers.append(nn.Linear(in_features, hidden_size))
                layers.append(activation)
                layers.append(nn.Dropout(dropout_rate))
                in_features = hidden_size
            layers.append(nn.Linear(hidden_size, 1))
            self.model = nn.Sequential(*layers)
    
        def forward(self, x):
            return self.model(x)
    
    # Model and Optimizer
    model = NeuralNet(input_dim=X_train.shape[1])
    model.to(device)
    
    # Loss and Optimizer
    criterion = nn.L1Loss()
    optimizer = optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    
    # Data Loaders
    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    
    # Training Loop
    num_epochs = 50
    for epoch in range(num_epochs):
        model.train()
        for X_batch, y_batch in train_loader:
            X_batch = X_batch.to(device)
            y_batch = y_batch.to(device)
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
        
        # Validation
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                X_batch = X_batch.to(device)
                y_batch = y_batch.to(device)
                outputs = model(X_batch)
                loss = criterion(outputs, y_batch)
                val_loss += loss.item() * X_batch.size(0)
        val_loss /= len(val_dataset)
        
        # Report
        trial.report(val_loss, epoch)
        
        # Prune
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()
    
    return val_loss

In [6]:
# Run Optuna Study
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50)

print('Best trial:')
trial = study.best_trial

print('  Validation Loss: {:.5f}'.format(trial.value))
print('  Best hyperparameters: {}'.format(trial.params))


[I 2024-11-03 17:12:47,792] A new study created in memory with name: no-name-a295f35a-cdc7-41dc-9bb0-d56c27ae8cc9
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-2)
  weight_decay = trial.suggest_loguniform('weight_decay', 1e-6, 1e-2)
[I 2024-11-03 17:13:09,513] Trial 0 finished with value: 1.050882129351298 and parameters: {'hidden_size': 376, 'num_layers': 1, 'dropout_rate': 0.18881415550890318, 'learning_rate': 0.0034674783138713694, 'weight_decay': 2.3764610150734924e-05, 'batch_size': 64, 'activation': 'ReLU'}. Best is trial 0 with value: 1.050882129351298.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-2)
  weight_decay = trial.suggest_loguniform('weight_decay', 1e-6, 1e-2)
[I 2024-11-03 17:13:27,091] Trial 1 finished with value: 1.131344741821289 and parameters: {'hidden_size': 159, 'num_layers': 5, 'dropout_rate': 0.18008498622670782, 'learning_rate': 1.263928208008661e-05, 'weight_decay': 1.2396321775875055e-06, 'batch_size': 128, 'ac

Best trial:
  Validation Loss: 0.98136
  Best hyperparameters: {'hidden_size': 890, 'num_layers': 2, 'dropout_rate': 0.12161424428266182, 'learning_rate': 4.2052587410959206e-05, 'weight_decay': 2.8358340925720687e-05, 'batch_size': 32, 'activation': 'ReLU'}


In [7]:
# Train Best Model
best_params = trial.params
activation = getattr(nn, best_params['activation'])()

class NeuralNet(nn.Module):
    def __init__(self, input_dim):
        super(NeuralNet, self).__init__()
        layers = []
        in_features = input_dim
        for _ in range(best_params['num_layers']):
            layers.append(nn.Linear(in_features, best_params['hidden_size']))
            layers.append(activation)
            layers.append(nn.Dropout(best_params['dropout_rate']))
            in_features = best_params['hidden_size']
        layers.append(nn.Linear(best_params['hidden_size'], 1))
        self.model = nn.Sequential(*layers)

    def forward(self, x):
        return self.model(x)

model = NeuralNet(input_dim=X_train.shape[1])
model.to(device)

criterion = nn.L1Loss()
optimizer = optim.AdamW(model.parameters(),
                        lr=best_params['learning_rate'],
                        weight_decay=best_params['weight_decay'])
batch_size = best_params['batch_size']

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    for X_batch, y_batch in train_loader:
        X_batch = X_batch.to(device)
        y_batch = y_batch.to(device)
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()

In [8]:
# Prediction
model.eval()
with torch.no_grad():
    X_test_device = X_test_tensor.to(device)
    predictions = model(X_test_device)
    final_preds = predictions.cpu().numpy().squeeze()

# Submission
submission = pd.DataFrame({
    'id': test_df['id'],
    'Hardness': final_preds
})
submission.to_csv('submission.csv', index=False)