In [None]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, random_split
import h5py
import timm
from torch.optim import Adam
from tqdm import tqdm

class SequenceDataset(Dataset):
    def __init__(self, h5_file):
        self.h5_file = h5py.File(h5_file, "r")
        self.images = self.h5_file["images"]  # Shape: (N, 32, 32)
        self.labels = self.h5_file["labels"]  # Shape: (N,)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        image = self.images[idx]  # Shape: (32, 32)
        image = torch.tensor(image, dtype=torch.float32).unsqueeze(0)  # Shape: (1, 32, 32)
        label = torch.tensor(self.labels[idx], dtype=torch.long)
        return image, label

class BEiT(nn.Module):
    def __init__(self, img_size=32, in_channels=1, num_classes=2):
        super(TinyBEiT, self).__init__()
        self.model = timm.create_model(
            'beit_base_patch16_224', 
            img_size=img_size,
            patch_size=1,
            in_chans=in_channels,
            num_classes=num_classes,
            embed_dim=64,
            depth=4,
            num_heads=2,
            mlp_ratio=2.0,
            drop_rate=0.1,
            pretrained=False
        )

    def forward(self, x):
        return self.model(x)
    
    @property
    def config(self):
        return {
            'name': 'TinyBEiT-p1',
            'embed_dim': self.model.embed_dim,
            'depth': len(self.model.blocks),
            'num_heads': self.model.blocks[0].attn.num_heads,
            'num_params': sum(p.numel() for p in self.parameters())
        }

# Load dataset
h5_file_path = "/kaggle/input/easydata/merged_data (3).h5"
dataset = SequenceDataset(h5_file_path)

# Split dataset
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

# Create DataLoader with multiple workers
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=4, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=4, pin_memory=True)

# Initialize the model
model = TinyBEiT(img_size=32, in_channels=1, num_classes=2)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Define loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=1e-4)

# Use mixed precision training for faster execution
scaler = torch.cuda.amp.GradScaler()

# Training loop
epochs = 5
for epoch in range(epochs):
    model.train()
    total_loss = 0
    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}"):
        images, labels = images.to(device), labels.to(device)

        # Forward pass with mixed precision
        with torch.cuda.amp.autocast():
            outputs = model(images)
            loss = criterion(outputs, labels)

        # Backward pass and optimization with scaler
        optimizer.zero_grad()
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        total_loss += loss.item()

    print(f"Epoch {epoch + 1}/{epochs}, Loss: {total_loss / len(train_loader):.4f}")

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predictions = torch.max(outputs, dim=1)

        correct += (predictions == labels).sum().item()
        total += labels.size(0)

print(f"Test Accuracy: {correct / total:.4f}")


In [None]:
h5_file_path1 = "/kaggle/input/mediumdata/merged_data (2).h5"
dataset1 = SequenceDataset(h5_file_path1)

# Split dataset
train_size1 = int(0.8 * len(dataset1))
test_size1 = len(dataset1) - train_size1
train_dataset1, test_dataset1 = random_split(dataset1, [train_size1, test_size1])

# Create DataLoader with multiple workers
train_loader1 = DataLoader(train_dataset1, batch_size=16, shuffle=True, num_workers=4, pin_memory=True)
test_loader1 = DataLoader(test_dataset1, batch_size=16, shuffle=False, num_workers=4, pin_memory=True)



epochs = 5
for epoch in range(epochs):
    model.train()
    total_loss = 0
    for images, labels in tqdm(train_loader1, desc=f"Epoch {epoch+1}/{epochs}"):
        images, labels = images.to(device), labels.to(device)

        # Forward pass with mixed precision
        with torch.cuda.amp.autocast():
            outputs = model(images)
            loss = criterion(outputs, labels)

        # Backward pass and optimization with scaler
        optimizer.zero_grad()
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        total_loss += loss.item()

    print(f"Epoch {epoch + 1}/{epochs}, Loss: {total_loss / len(train_loader):.4f}")

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader1:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predictions = torch.max(outputs, dim=1)

        correct += (predictions == labels).sum().item()
        total += labels.size(0)

print(f"Test Accuracy: {correct / total:.4f}")


In [None]:
h5_file_path2 = "/kaggle/input/harddata/merged_data (1).h5"
dataset2 = SequenceDataset(h5_file_path2)

# Split dataset
train_size2 = int(0.8 * len(dataset2))
test_size2 = len(dataset2) - train_size2
train_dataset2, test_dataset2 = random_split(dataset2, [train_size2, test_size2])

# Create DataLoader with multiple workers
train_loader2 = DataLoader(train_dataset2, batch_size=16, shuffle=True, num_workers=4, pin_memory=True)
test_loader2 = DataLoader(test_dataset2, batch_size=16, shuffle=False, num_workers=4, pin_memory=True)



epochs = 5
for epoch in range(epochs):
    model.train()
    total_loss = 0
    for images, labels in tqdm(train_loader2, desc=f"Epoch {epoch+1}/{epochs}"):
        images, labels = images.to(device), labels.to(device)

        # Forward pass with mixed precision
        with torch.cuda.amp.autocast():
            outputs = model(images)
            loss = criterion(outputs, labels)

        # Backward pass and optimization with scaler
        optimizer.zero_grad()
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        total_loss += loss.item()

    print(f"Epoch {epoch + 1}/{epochs}, Loss: {total_loss / len(train_loader):.4f}")

# Evaluation loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader2:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predictions = torch.max(outputs, dim=1)

        correct += (predictions == labels).sum().item()
        total += labels.size(0)

print(f"Test Accuracy: {correct / total:.4f}")
