In [1]:
import sys
print(sys.executable)
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import transforms
from PIL import Image
import numpy as np
import glob

# Set CUDA device to GPU #7
torch.cuda.set_device(7)
device = torch.device('cuda:7' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

class InterferogramDataset(Dataset):
    def __init__(self, file_paths):
        self.file_paths = file_paths
        self.transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Grayscale(1)
        ])
        
    def __len__(self):
        return len(self.file_paths)
    
    def __getitem__(self, idx):
        img_path = self.file_paths[idx]
        image = Image.open(img_path)
        image = self.transform(image)
        
        name = os.path.splitext(os.path.basename(img_path))[0]
        name = name.replace('n', '-').replace('p', '.')
        parts = name.split('_')
        params = np.zeros(8)
        for i, part in enumerate(parts[1:9]):
            params[i] = float(part[1:])
            
        return image, torch.FloatTensor(params)

def get_data_loaders(root_folder, batch_size=32, num_workers=8):
    """Create data loaders that efficiently load images in small chunks."""
    file_paths = glob.glob(os.path.join(root_folder, '*.jpg'))
    dataset = InterferogramDataset(file_paths)
    
    train_size = int(0.8 * len(dataset))
    val_size = len(dataset) - train_size
    train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True)

    return train_loader, val_loader

class InterferogramNet(nn.Module):
    def __init__(self):
        super(InterferogramNet, self).__init__()

        def conv_block(in_channels, out_channels, kernel_size=3, stride=1, padding=1):
            """Creates a Conv2D -> BatchNorm -> ReLU block"""
            return nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding),
                nn.BatchNorm2d(out_channels),
                nn.ReLU(inplace=True)
            )

        self.features = nn.Sequential(
            conv_block(1, 32),
            conv_block(32, 32),
            nn.MaxPool2d(2, 2),

            conv_block(32, 64),
            conv_block(64, 64),
            conv_block(64, 64),
            nn.MaxPool2d(2, 2),

            conv_block(64, 128),
            conv_block(128, 128),
            conv_block(128, 128),
            conv_block(128, 128),
            nn.MaxPool2d(2, 2),

            conv_block(128, 256),
            conv_block(256, 256),
            conv_block(256, 256),
            conv_block(256, 256),
            conv_block(256, 256),
            nn.MaxPool2d(2, 2),

            conv_block(256, 512),
            conv_block(512, 512),
            conv_block(512, 512),
            conv_block(512, 512),
            nn.MaxPool2d(2, 2)
        )

        self.classifier = nn.Sequential(
            nn.Linear(512 * 7 * 7, 1024),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 16),
            nn.ReLU(),
            nn.Linear(16, 8)
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

def preprocess_image(filename):
    image = Image.open(filename).convert('L')
    image = np.array(image, dtype=np.float32) / 255.0
    return image

def output_function(epoch, iteration, train_loss, train_rmse, val_loss=None, val_rmse=None, 
                   time_since_start=0, learning_rate=0):
    if iteration % 100 == 0:
        print(f'\n=== Training Progress at Iteration {iteration} ===')
        print(f'Epoch: {epoch}')
        print(f'Training Loss: {train_loss:.6f}')
        print(f'Training RMSE: {train_rmse:.6f}')
        if val_loss is not None:
            print(f'Validation Loss: {val_loss:.6f}')
            print(f'Validation RMSE: {val_rmse:.6f}')
        print(f'Time Since Start: {time_since_start/60:.2f} minutes')
        print(f'Current Learning Rate: {learning_rate:.6f}')
        print('===================================\n')

def main():

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f" Using device: {device}")

    train_loader, val_loader = get_data_loaders('training3', batch_size=400, num_workers=4)

    
    model = InterferogramNet().to(device)
    
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    
    num_epochs = 10
    iteration = 0
    best_val_loss = float('inf')
    patience_counter = 0
    
    for epoch in range(num_epochs):
        model.train()
        for batch_idx, (images, targets) in enumerate(train_loader):
            images, targets = images.to(device), targets.to(device)
            
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            
            rmse = torch.sqrt(loss)
            iteration += 1
            
            if iteration % 5 == 0:
                model.eval()
                val_loss = 0
                val_rmse = 0
                with torch.no_grad():
                    for val_images, val_targets in val_loader:
                        val_images, val_targets = val_images.to(device), val_targets.to(device)
                        val_outputs = model(val_images)
                        val_batch_loss = criterion(val_outputs, val_targets)
                        val_loss += val_batch_loss.item()
                        val_rmse += torch.sqrt(val_batch_loss).item()
                
                val_loss /= len(val_loader)
                val_rmse /= len(val_loader)
                
                if val_loss < best_val_loss:
                    best_val_loss = val_loss
                    patience_counter = 0
                else:
                    patience_counter += 1
                    if patience_counter >= 1000:
                        print("Early stopping triggered")
                        break
                
                output_function(epoch, iteration, loss.item(), rmse.item(),
                              val_loss, val_rmse, iteration * 0.1, optimizer.param_groups[0]['lr'])
                
                model.train()
    
    save_dir = 'models'
    os.makedirs(save_dir, exist_ok=True)
    model_path = os.path.join(save_dir, 'trained_network.pth')
    torch.save(model.state_dict(), model_path)
    print(f'Model saved as: {model_path}')
    
    print('\n=== Testing Some Predictions ===')
    model.eval()
    with torch.no_grad():
        test_images, actual_params = next(iter(val_loader))
        test_images, actual_params = test_images.to(device), actual_params.to(device)
        predictions = model(test_images)
        
        for i in range(min(5, predictions.size(0))):
            print(f'\nSample {i+1}:')
            print(f'Predicted: D={predictions[i,0]:.4f}, C={predictions[i,1]:.4f}, '
                  f'B={predictions[i,2]:.4f}, G={predictions[i,3]:.4f}, '
                  f'F={predictions[i,4]:.4f}, J={predictions[i,5]:.4f}, '
                  f'E={predictions[i,6]:.4f}, I={predictions[i,7]:.4f}')
            print(f'Actual:    D={actual_params[i,0]:.4f}, C={actual_params[i,1]:.4f}, '
                  f'B={actual_params[i,2]:.4f}, G={actual_params[i,3]:.4f}, '
                  f'F={actual_params[i,4]:.4f}, J={actual_params[i,5]:.4f}, '
                  f'E={actual_params[i,6]:.4f}, I={actual_params[i,7]:.4f}')
            
            errors = torch.abs(predictions[i] - actual_params[i])
            print(f'Abs Error: D={errors[0]:.4f}, C={errors[1]:.4f}, '
                  f'B={errors[2]:.4f}, G={errors[3]:.4f}, '
                  f'F={errors[4]:.4f}, J={errors[5]:.4f}, '
                  f'E={errors[6]:.4f}, I={errors[7]:.4f}')

if __name__ == '__main__':
    main()

/opt/miniforge3/bin/python


ModuleNotFoundError: No module named 'torchvision'