In [None]:
import torch
from torch.utils.data import Dataset, DataLoader, random_split
import h5py
from transformers import ViTForImageClassification, ViTConfig
import torch.nn as nn
import torch
from torch.utils.data import Dataset, DataLoader
import h5py

# Define the SequenceDataset class
class SequenceDataset(Dataset):
    def __init__(self, h5_file):
        # Load the HDF5 file
        self.h5_file = h5py.File(h5_file, "r")
        self.images = self.h5_file["images"]  # Shape: (N, 32, 32)
        self.labels = self.h5_file["labels"]  # Shape: (N,)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
       
        image = self.images[idx]  
        image = torch.tensor(image, dtype=torch.float32).unsqueeze(0)  

        label = torch.tensor(self.labels[idx], dtype=torch.long)

        return image, label
h5_file_path1 = '/kaggle/input/easydata/merged_data (3).h5'  
dataset1 = SequenceDataset(h5_file_path1)

# Split dataset into training and testing sets (80% for training, 20% for testing)
train_size1 = int(0.8 * len(dataset1))
test_size1 = len(dataset1) - train_size1
train_dataset1, test_dataset1 = random_split(dataset1, [train_size1, test_size1])

# Create DataLoaders for batching
train_loader1 = DataLoader(train_dataset1, batch_size=16, shuffle=True)
test_loader1 = DataLoader(test_dataset1, batch_size=16, shuffle=False)

print("data loaded succ")

config = ViTConfig(
    image_size=32,           # Input image size
    patch_size=1,            # Patch size
    num_channels=1,          # Grayscale images
    num_labels=2,            # Number of classes (binary classification)
    hidden_size=256,         # Hidden size of the transformer
    num_hidden_layers=6,     # Number of transformer layers
    num_attention_heads=8,   # Number of attention heads
    intermediate_size=512,   # Intermediate size in feed-forward layers
)

# Initialize the model
model = ViTForImageClassification(config)

# Optimizer and loss function
optimizer = torch.optim.AdamW(model.parameters(), lr=5e-5)
criterion = nn.CrossEntropyLoss()

# Training function
def train_epoch(model, dataloader, optimizer, device):
    model.train()
    total_loss = 0
    correct = 0
    total = 0

    for images, labels in dataloader:
        images, labels = images.to(device), labels.to(device)

        # Forward pass
        outputs = model(pixel_values=images)
        loss = criterion(outputs.logits, labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Update metrics
        total_loss += loss.item()
        _, preds = torch.max(outputs.logits, dim=1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    avg_loss = total_loss / len(dataloader)
    accuracy = correct / total
    return avg_loss, accuracy

# Evaluation function
def evaluate(model, dataloader, device):
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)

            # Forward pass
            outputs = model(pixel_values=images)
            _, preds = torch.max(outputs.logits, dim=1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

    accuracy = correct / total
    return accuracy
# Move model to the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Training loop
num_epochs = 5
for epoch in range(num_epochs):
    print(f"Epoch {epoch + 1}/{num_epochs}")
    train_loss, train_acc = train_epoch(model, train_loader1, optimizer, device)
    print(f"Train Loss: {train_loss:.4f}, Train Accuracy: {train_acc:.4f}")

    test_acc = evaluate(model, test_loader1, device)
    print(f"Test Accuracy: {test_acc:.4f}")

In [None]:
h5_file_path2 = '/kaggle/input/mediumdata/merged_data (2).h5'  # Update the path if needed
dataset2 = SequenceDataset(h5_file_path2)

# Split dataset into training and testing sets (80% for training, 20% for testing)
train_size2 = int(0.8 * len(dataset2))
test_size2 = len(dataset2) - train_size2
train_dataset2, test_dataset2 = random_split(dataset2, [train_size2, test_size2])

# Create DataLoaders for batching
train_loader2 = DataLoader(train_dataset2, batch_size=16, shuffle=True)
test_loader2 = DataLoader(test_dataset2, batch_size=16, shuffle=False)

print(" medi data loaded succ")


# Training function
def train_epoch(model, dataloader, optimizer, device):
    model.train()
    total_loss = 0
    correct = 0
    total = 0

    for images, labels in dataloader:
        images, labels = images.to(device), labels.to(device)

        # Forward pass
        outputs = model(pixel_values=images)
        loss = criterion(outputs.logits, labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Update metrics
        total_loss += loss.item()
        _, preds = torch.max(outputs.logits, dim=1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    avg_loss = total_loss / len(dataloader)
    accuracy = correct / total
    return avg_loss, accuracy

# Evaluation function
def evaluate(model, dataloader, device):
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)

            # Forward pass
            outputs = model(pixel_values=images)
            _, preds = torch.max(outputs.logits, dim=1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

    accuracy = correct / total
    return accuracy
# Move model to the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Training loop
num_epochs = 5
for epoch in range(num_epochs):
    print(f"Epoch {epoch + 1}/{num_epochs}")
    train_loss, train_acc = train_epoch(model, train_loader2, optimizer, device)
    print(f"Train Loss: {train_loss:.4f}, Train Accuracy: {train_acc:.4f}")

    test_acc = evaluate(model, test_loader2, device)
    print(f"Test Accuracy: {test_acc:.4f}")

In [None]:
h5_file_path3 = '/kaggle/input/harddata/merged_data (1).h5'  # Update the path if needed
dataset2 = SequenceDataset(h5_file_path2)

# Split dataset into training and testing sets (80% for training, 20% for testing)
train_size2 = int(0.8 * len(dataset2))
test_size2 = len(dataset2) - train_size2
train_dataset2, test_dataset2 = random_split(dataset2, [train_size2, test_size2])

# Create DataLoaders for batching
train_loader2 = DataLoader(train_dataset2, batch_size=16, shuffle=True)
test_loader2 = DataLoader(test_dataset2, batch_size=16, shuffle=False)

print("hard data loaded succ")

# Training function
def train_epoch(model, dataloader, optimizer, device):
    model.train()
    total_loss = 0
    correct = 0
    total = 0

    for images, labels in dataloader:
        images, labels = images.to(device), labels.to(device)

        # Forward pass
        outputs = model(pixel_values=images)
        loss = criterion(outputs.logits, labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Update metrics
        total_loss += loss.item()
        _, preds = torch.max(outputs.logits, dim=1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    avg_loss = total_loss / len(dataloader)
    accuracy = correct / total
    return avg_loss, accuracy

# Evaluation function
def evaluate(model, dataloader, device):
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)

            # Forward pass
            outputs = model(pixel_values=images)
            _, preds = torch.max(outputs.logits, dim=1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

    accuracy = correct / total
    return accuracy
# Move model to the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Training loop
num_epochs = 5
for epoch in range(num_epochs):
    print(f"Epoch {epoch + 1}/{num_epochs}")
    train_loss, train_acc = train_epoch(model, train_loader3, optimizer, device)
    print(f"Train Loss: {train_loss:.4f}, Train Accuracy: {train_acc:.4f}")

    test_acc = evaluate(model, test_loader3, device)
    print(f"Test Accuracy: {test_acc:.4f}")