In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, random_split

# Check GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Define a simple model
class SimpleModel(nn.Module):
    def __init__(self, input_size, num_classes):
        super(SimpleModel, self).__init__()
        self.fc = nn.Linear(input_size, num_classes)

    def forward(self, x):
        return self.fc(x)

# Create a large synthetic dataset
num_samples, input_size, num_classes = 1_000_000, 100, 10
X = torch.randn(num_samples, input_size)
y = torch.randint(0, num_classes, (num_samples,))

# Split into training and validation sets (80% train, 20% test)
train_size = int(0.8 * num_samples)
val_size = num_samples - train_size
train_dataset, val_dataset = random_split(TensorDataset(X, y), [train_size, val_size])

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=4096, shuffle=True, num_workers=2, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=2048, shuffle=False, num_workers=2, pin_memory=True)

# Initialize model and use DataParallel if multiple GPUs are available
model = SimpleModel(input_size, num_classes).to(device)
if torch.cuda.device_count() > 1:
    print(f"Using {torch.cuda.device_count()} GPUs with DataParallel!")
    model = nn.DataParallel(model)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
epochs = 5
for epoch in range(epochs):
    model.train()
    total_loss = 0.0
    for batch_x, batch_y in train_loader:
        batch_x, batch_y = batch_x.to(device), batch_y.to(device)

        optimizer.zero_grad()
        outputs = model(batch_x)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_train_loss = total_loss / len(train_loader)
    print(f"Epoch {epoch+1}/{epochs}, Training Loss: {avg_train_loss:.4f}")

# Save the trained model
torch.save(model.state_dict(), "model_colab.pth")
print("Model saved successfully!")

# Evaluation function
def evaluate(model, val_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for batch_x, batch_y in val_loader:
            batch_x, batch_y = batch_x.to(device), batch_y.to(device)

            outputs = model(batch_x)
            _, predicted = torch.max(outputs, 1)  # Get class with highest probability
            correct += (predicted == batch_y).sum().item()
            total += batch_y.size(0)

    accuracy = 100 * correct / total
    print(f"Validation Accuracy: {accuracy:.2f}%")

# Load model for evaluation
model.load_state_dict(torch.load("model_colab.pth"))
evaluate(model, val_loader)


Using device: cpu
Epoch 1/5, Training Loss: 2.3306
Epoch 2/5, Training Loss: 2.3036
Epoch 3/5, Training Loss: 2.3035
Epoch 4/5, Training Loss: 2.3036
Epoch 5/5, Training Loss: 2.3036
Model saved successfully!


  model.load_state_dict(torch.load("model_colab.pth"))


Validation Accuracy: 9.92%


In [25]:
!git add /content/drive/MyDrive/Colab Notebooks/scale_ML_prototype.ipynb

fatal: /content/drive/MyDrive/Colab: '/content/drive/MyDrive/Colab' is outside repository at '/content/scale_ML_prototype'


In [26]:
!find / -name ".git" 2>/dev/null


/root/.cache/uv/sdists-v7/.git
/content/scale_ML_prototype/.git


# New Section

In [30]:
!ls -lah /content/scale_ML_prototype/
!find / -name "scale_ML_prototype.ipynb" 2>/dev/null


total 16K
drwxr-xr-x 3 root root 4.0K Mar  3 02:50 .
drwxr-xr-x 1 root root 4.0K Mar  3 03:16 ..
drwxr-xr-x 8 root root 4.0K Mar  3 03:26 .git
-rw-r--r-- 1 root root   20 Mar  3 02:50 README.md
/content/drive/MyDrive/Colab Notebooks/scale_ML_prototype.ipynb
