# Hybrid Model: Combining Neural Networks with Traditional ML

This notebook implements a hybrid model that combines neural networks with traditional machine learning approaches.

## 1. Import Required Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
import warnings
warnings.filterwarnings('ignore')

%matplotlib inline

## 2. Load and Prepare Data

In [None]:
# Load training data
train_data = pd.read_csv('train.csv')
test_data = pd.read_csv('test.csv')

print(f"Training data shape: {train_data.shape}")
print(f"Test data shape: {test_data.shape}")
print(f"\nFirst few rows:")
print(train_data.head())

In [None]:
# Explore data
print(f"\nData types:\n{train_data.dtypes}")
print(f"\nData statistics:\n{train_data.describe()}")

When initially checked, there is no missing data, though it is kind of warped. 

In [None]:
# Extract trajectory features from last 0.1 seconds of data prior to ball release.

## 3. Build Neural Network Component

In [1]:
class NeuralNetwork(nn.Module):
    def __init__(self, input_dim):
        """Create a neural network model."""
        super(NeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.dropout1 = nn.Dropout(0.3)
        self.fc2 = nn.Linear(128, 64)
        self.dropout2 = nn.Dropout(0.3)
        self.fc3 = nn.Linear(64, 32)
        self.dropout3 = nn.Dropout(0.2)
        self.fc4 = nn.Linear(32, 1)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.dropout1(x)
        x = self.relu(self.fc2(x))
        x = self.dropout2(x)
        x = self.relu(self.fc3(x))
        x = self.dropout3(x)
        x = self.sigmoid(self.fc4(x))
        return x

# Create the neural network
nn_model = NeuralNetwork(X_train_scaled.shape[1])
print(nn_model)

NameError: name 'nn' is not defined

## 4. Build Traditional ML Component

In [None]:
# Create a Random Forest classifier
rf_model = RandomForestClassifier(n_estimators=100, max_depth=15, random_state=42, n_jobs=-1)

# Train the Random Forest model
rf_model.fit(X_train, y_train)

print("Random Forest model trained successfully")

## 5. Combine Models into Hybrid Architecture

In [None]:
class HybridModel(nn.Module):
    def __init__(self, input_dim):
        """Create a hybrid model using ensemble stacking."""
        super(HybridModel, self).__init__()
        
        # Neural network branch
        self.nn_fc1 = nn.Linear(input_dim, 64)
        self.nn_dropout1 = nn.Dropout(0.3)
        self.nn_fc2 = nn.Linear(64, 32)
        self.nn_dropout2 = nn.Dropout(0.2)
        self.nn_fc3 = nn.Linear(32, 16)
        
        # Meta-learner
        self.meta_fc1 = nn.Linear(input_dim + 16, 32)
        self.meta_dropout1 = nn.Dropout(0.2)
        self.meta_fc2 = nn.Linear(32, 16)
        self.meta_fc3 = nn.Linear(16, 1)
        
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x):
        # Neural network branch
        nn_branch = self.relu(self.nn_fc1(x))
        nn_branch = self.nn_dropout1(nn_branch)
        nn_branch = self.relu(self.nn_fc2(nn_branch))
        nn_branch = self.nn_dropout2(nn_branch)
        nn_output = self.relu(self.nn_fc3(nn_branch))
        
        # Combine all features into meta-features
        combined = torch.cat([x, nn_output], dim=1)
        
        # Meta-learner
        meta = self.relu(self.meta_fc1(combined))
        meta = self.meta_dropout1(meta)
        meta = self.relu(self.meta_fc2(meta))
        output = self.sigmoid(self.meta_fc3(meta))
        
        return output

# Create hybrid model
hybrid_model = HybridModel(X_train_scaled.shape[1])
print(hybrid_model)

## 6. Train Hybrid Model

In [None]:
# Prepare data for PyTorch
X_train_tensor = torch.FloatTensor(X_train_scaled)
y_train_tensor = torch.FloatTensor(y_train.values).reshape(-1, 1)
X_val_tensor = torch.FloatTensor(X_val_scaled)
y_val_tensor = torch.FloatTensor(y_val.values).reshape(-1, 1)

# Create DataLoaders
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# Training setup
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
hybrid_model = hybrid_model.to(device)
optimizer = optim.Adam(hybrid_model.parameters(), lr=0.001)
criterion = nn.BCELoss()

# Train the hybrid model
num_epochs = 50
train_losses = []
val_losses = []
train_accuracies = []
val_accuracies = []

for epoch in range(num_epochs):
    # Training phase
    hybrid_model.train()
    train_loss = 0.0
    train_correct = 0
    train_total = 0
    
    for batch_X, batch_y in train_loader:
        batch_X, batch_y = batch_X.to(device), batch_y.to(device)
        
        optimizer.zero_grad()
        outputs = hybrid_model(batch_X)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
        train_correct += ((outputs > 0.5) == batch_y).sum().item()
        train_total += batch_y.size(0)
    
    # Validation phase
    hybrid_model.eval()
    with torch.no_grad():
        X_val_device = X_val_tensor.to(device)
        y_val_device = y_val_tensor.to(device)
        
        val_outputs = hybrid_model(X_val_device)
        val_loss = criterion(val_outputs, y_val_device)
        val_correct = ((val_outputs > 0.5) == y_val_device).sum().item()
        val_total = y_val_device.size(0)
    
    train_losses.append(train_loss / len(train_loader))
    val_losses.append(val_loss.item())
    train_accuracies.append(train_correct / train_total)
    val_accuracies.append(val_correct / val_total)
    
    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_losses[-1]:.4f}, Val Loss: {val_losses[-1]:.4f}, '
              f'Train Acc: {train_accuracies[-1]:.4f}, Val Acc: {val_accuracies[-1]:.4f}')

In [None]:
# Plot training history
fig, axes = plt.subplots(1, 2, figsize=(14, 4))

axes[0].plot(train_losses, label='Training Loss')
axes[0].plot(val_losses, label='Validation Loss')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Loss')
axes[0].set_title('Model Loss')
axes[0].legend()
axes[0].grid(True)

axes[1].plot(train_accuracies, label='Training Accuracy')
axes[1].plot(val_accuracies, label='Validation Accuracy')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Accuracy')
axes[1].set_title('Model Accuracy')
axes[1].legend()
axes[1].grid(True)

plt.tight_layout()
plt.show()

## 7. Evaluate Model Performance

In [None]:
# Get predictions
hybrid_model.eval()
with torch.no_grad():
    X_val_device = X_val_tensor.to(device)
    nn_pred_tensor = nn_model.to(device)(X_val_device)
    nn_pred = (nn_pred_tensor > 0.5).cpu().numpy().astype(int).flatten()
    
    hybrid_pred_tensor = hybrid_model(X_val_device)
    hybrid_pred = (hybrid_pred_tensor > 0.5).cpu().numpy().astype(int).flatten()

rf_pred = rf_model.predict(X_val)

# Calculate metrics
def evaluate_model(y_true, y_pred, model_name):
    print(f"\n{model_name} Performance:")
    print(f"Accuracy:  {accuracy_score(y_true, y_pred):.4f}")
    print(f"Precision: {precision_score(y_true, y_pred):.4f}")
    print(f"Recall:    {recall_score(y_true, y_pred):.4f}")
    print(f"F1-Score:  {f1_score(y_true, y_pred):.4f}")

evaluate_model(y_val, nn_pred, "Neural Network")
evaluate_model(y_val, rf_pred, "Random Forest")
evaluate_model(y_val, hybrid_pred, "Hybrid Model")

In [None]:
# Visualize confusion matrices
fig, axes = plt.subplots(1, 3, figsize=(15, 4))

models = [(nn_pred, "Neural Network"), (rf_pred, "Random Forest"), (hybrid_pred, "Hybrid Model")]

for idx, (predictions, title) in enumerate(models):
    cm = confusion_matrix(y_val, predictions)
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=axes[idx])
    axes[idx].set_title(title)
    axes[idx].set_ylabel('True Label')
    axes[idx].set_xlabel('Predicted Label')

plt.tight_layout()
plt.show()

## 8. Compare Hybrid vs. Individual Models

In [None]:
# Create comparison dataframe
comparison_df = pd.DataFrame({
    'Model': ['Neural Network', 'Random Forest', 'Hybrid Model'],
    'Accuracy': [
        accuracy_score(y_val, nn_pred),
        accuracy_score(y_val, rf_pred),
        accuracy_score(y_val, hybrid_pred)
    ],
    'Precision': [
        precision_score(y_val, nn_pred),
        precision_score(y_val, rf_pred),
        precision_score(y_val, hybrid_pred)
    ],
    'Recall': [
        recall_score(y_val, nn_pred),
        recall_score(y_val, rf_pred),
        recall_score(y_val, hybrid_pred)
    ],
    'F1-Score': [
        f1_score(y_val, nn_pred),
        f1_score(y_val, rf_pred),
        f1_score(y_val, hybrid_pred)
    ]
})

print("\nModel Comparison:")
print(comparison_df.to_string(index=False))

In [None]:
# Visualize comparison
fig, ax = plt.subplots(figsize=(12, 6))

x = np.arange(len(comparison_df))
width = 0.2

ax.bar(x - 1.5*width, comparison_df['Accuracy'], width, label='Accuracy')
ax.bar(x - 0.5*width, comparison_df['Precision'], width, label='Precision')
ax.bar(x + 0.5*width, comparison_df['Recall'], width, label='Recall')
ax.bar(x + 1.5*width, comparison_df['F1-Score'], width, label='F1-Score')

ax.set_xlabel('Models')
ax.set_ylabel('Score')
ax.set_title('Model Performance Comparison')
ax.set_xticks(x)
ax.set_xticklabels(comparison_df['Model'])
ax.legend()
ax.set_ylim([0, 1])
ax.grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.show()