In [99]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

default_dtype = torch.float32
device = "cuda" if torch.cuda.is_available() else "cpu"
torch.set_default_device(device)

In [None]:
def cyclic_sort(x_input):
    """
    Sorts each row so the maximum panel length is always first.
    This resolves cyclic symmetry.
    """
    x = x_input.copy()
    for i, x_row in enumerate(x):
        max_index = np.argmax(x_row)
        x[i] = np.roll(x_row, shift=-max_index)
    return x

def mirror_sort(x_input):
    """
    Ensures the largest neighboring panel is in a consistent position.
    This resolves mirror symmetry.
    """
    x = x_input.copy()
    for i, x_row in enumerate(x):
        right_panel = x_row[1]
        left_panel = x_row[-1]
        if left_panel > right_panel:
            x[i, 1:] = np.flip(x_row[1:])
    return x

def compute_aspect_ratios(X):
    """
    Normalize panel lengths by the longest panel.
    This prevents a single large panel from dominating the model's learning.
    """
    aspect_ratios = np.zeros_like(X)
    for i in range(X.shape[0]):
        aspect_ratios[i] = X[i] / np.max(X[i]) 
    return aspect_ratios




def disambiguate_symmetries(x_input):
    """
    Applies both cyclic and mirror sorting to fully resolve symmetries.
    """
    x = np.sort(x_input, axis=1)[:, ::-1].copy()
    x = mirror_sort(x)
    x = compute_aspect_ratios(x)
    return x


In [None]:
def load_fence_data(file_paths, input_dim=9):

    dfs = []
    for file in file_paths:
        df = pd.read_csv(file)
        panel_cols = [str(i) for i in range(input_dim)]
        
        for col in panel_cols:
            if col not in df.columns:
                df[col] = 0.0
        
        dfs.append(df)

    data = pd.concat(dfs, ignore_index=True)
    
    X = data[panel_cols].values.astype(np.float32)
    X = disambiguate_symmetries(X)  # Apply symmetry resolution

    y = data['CE'].values.astype(np.float32)
    return X, y

train_files = [
    "data/kaggle_train_5_fences.csv",
    "data/kaggle_train_7_fences.csv",
    "data/kaggle_train_9_fences.csv"
]
X_train, y_train = load_fence_data(train_files)

X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)
print("Train shape:", X_train.shape, "Validation shape:", X_val.shape)


Train shape: (14250, 9) Validation shape: (750, 9)


In [146]:
class FenceDataset(Dataset):
    """
    PyTorch Dataset wrapping processed panel length data.
    """
    def __init__(self, X, y, dtype=torch.float32):
        self.X = torch.tensor(X, dtype=dtype)
        self.y = torch.tensor(y, dtype=dtype).unsqueeze(1)  # Reshape to (N,1)

    def __len__(self):
        return self.X.size(0)

    def __getitem__(self, index):
        return self.X[index], self.y[index]

# Create DataLoaders
train_dataset = FenceDataset(X_train, y_train)
val_dataset = FenceDataset(X_val, y_val)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)


In [147]:
class NeuralNetwork(nn.Module):
    def __init__(self, input_dim=9):
        super().__init__()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(input_dim, 32, bias=False),
            nn.ReLU(),
            nn.Linear(32, 64, bias=False),
            nn.ReLU(),
            nn.Linear(64, 128, bias=False),
            nn.ReLU(),
            nn.Linear(128, 64, bias=False),
            nn.ReLU(),
            nn.Linear(64, 64, bias=False),
            nn.ReLU(),
            nn.Linear(64, 32, bias=False),
            nn.ReLU(),
            nn.Linear(32, 16, bias=False),
            nn.ReLU(),
            nn.Linear(16, 1, bias=False),
            nn.Sigmoid()  # Binary classification
        )

    def forward(self, x):
        return self.linear_relu_stack(x)


In [148]:
def train_model(model, train_loader, val_loader, num_epochs, lr):

    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.BCELoss()
    model.train()

    for epoch in range(num_epochs):
        running_loss = 0.0
        correct = 0
        total = 0

        for panels_batch, labels_batch in train_loader:
            optimizer.zero_grad()
            outputs = model(panels_batch)
            loss = criterion(outputs, labels_batch)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * panels_batch.size(0)
            correct += ((outputs >= 0.5).float() == labels_batch).sum().item()
            total += labels_batch.size(0)

        train_loss = running_loss / total
        train_acc = correct / total

        print(f"Epoch {epoch+1}/{num_epochs} - Loss: {train_loss:.4f}, Accuracy: {train_acc:.2%}")

    return model

# Initialize and train the model
model = NeuralNetwork(input_dim=9).to(torch.device("cpu"))
model = train_model(model, train_loader, val_loader, num_epochs=100, lr=0.001)


Epoch 1/100 - Loss: 0.1643, Accuracy: 94.26%
Epoch 2/100 - Loss: 0.0547, Accuracy: 97.55%
Epoch 3/100 - Loss: 0.0394, Accuracy: 98.40%
Epoch 4/100 - Loss: 0.0415, Accuracy: 98.18%
Epoch 5/100 - Loss: 0.0388, Accuracy: 98.37%
Epoch 6/100 - Loss: 0.0332, Accuracy: 98.55%
Epoch 7/100 - Loss: 0.0423, Accuracy: 98.19%
Epoch 8/100 - Loss: 0.0321, Accuracy: 98.62%
Epoch 9/100 - Loss: 0.0364, Accuracy: 98.53%
Epoch 10/100 - Loss: 0.0425, Accuracy: 98.29%
Epoch 11/100 - Loss: 0.0329, Accuracy: 98.55%
Epoch 12/100 - Loss: 0.0325, Accuracy: 98.62%
Epoch 13/100 - Loss: 0.0355, Accuracy: 98.59%
Epoch 14/100 - Loss: 0.0272, Accuracy: 98.84%
Epoch 15/100 - Loss: 0.0254, Accuracy: 98.93%
Epoch 16/100 - Loss: 0.0306, Accuracy: 98.70%
Epoch 17/100 - Loss: 0.0346, Accuracy: 98.51%
Epoch 18/100 - Loss: 0.0313, Accuracy: 98.68%
Epoch 19/100 - Loss: 0.0248, Accuracy: 98.96%
Epoch 20/100 - Loss: 0.0284, Accuracy: 98.81%
Epoch 21/100 - Loss: 0.0302, Accuracy: 98.64%
Epoch 22/100 - Loss: 0.0279, Accuracy: 98.8

In [149]:
def find_misclassified_examples(model, dataloader):
    """
    Identifies misclassified samples from the validation set.
    """
    model.eval()
    errors = []

    with torch.no_grad():
        for panels_batch, labels_batch in dataloader:
            outputs = model(panels_batch)
            preds = (outputs >= 0.5).float()
            incorrect_indices = (preds != labels_batch).squeeze().nonzero(as_tuple=True)[0]
            
            for i in incorrect_indices:
                errors.append((panels_batch[i].cpu().numpy(), labels_batch[i].item(), preds[i].item()))
    
    return errors


# Example usage:
misclassified_samples = find_misclassified_examples(model, val_loader)

print(f"Found {len(misclassified_samples)} misclassified samples.")
for i, (panels, true_label, pred_label) in enumerate(misclassified_samples[:-1]):
    print(f"Example {i+1}:")
    print(f"  Panels: {panels}")
    print(f"  True Label: {true_label}, Predicted Label: {pred_label}")


Found 3 misclassified samples.
Example 1:
  Panels: [1.         0.4164022  0.2788603  0.21470524 0.19842508 0.17381363
 0.17157032 0.09322661 0.00333265]
  True Label: 1.0, Predicted Label: 0.0
Example 2:
  Panels: [1.         0.94227237 0.18488751 0.11356357 0.00462162 0.
 0.         0.         0.        ]
  True Label: 0.0, Predicted Label: 1.0


In [150]:
def load_test_data(file_path, input_dim=9):
    df = pd.read_csv(file_path)
    panel_cols = [str(i) for i in range(input_dim)]
    # Replace NaN values with 0 in the panel columns
    df[panel_cols] = df[panel_cols].fillna(0)
    
    X = df[panel_cols].values.astype(np.float32)
    X = disambiguate_symmetries(X)
    test_ids = df['id'].values
    return torch.tensor(X), test_ids

# Example usage:
X_test, test_ids = load_test_data("data/kaggle_hidden_test_fences.csv", input_dim=9)
print("Test data shape:", X_test.shape)


Test data shape: torch.Size([45000, 9])


In [151]:
model.eval()
with torch.no_grad():
    outputs_test = model(X_test)
    preds_test = (outputs_test >= 0.5).float().cpu().numpy().flatten()

submission_df = pd.DataFrame({'id': test_ids, 'prediction': preds_test.astype(int)})
submission_df.to_csv("submissiont1.csv", index=False)
print("Submission file saved as 'submissiont1.csv'")

Submission file saved as 'submissiont1.csv'
